diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -714,6 +714,17 @@ return RC; } + /// Returns the register class that should be used to perform a spill of the + /// specified register class to stack. + /// + /// \note A different register class might be required for targets where not + /// all registers can be stored/loaded into memory + virtual const TargetRegisterClass * + getRegClassForStackSpill(const MachineFunction &MF, + const TargetRegisterClass *Class) const { + return Class; + } + /// Returns the largest super class of RC that is legal to use in the current /// sub-target and has the same spill size. /// The returned register class can be used to create virtual registers which diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -208,6 +208,23 @@ return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); } + bool isAvailable(MCPhysReg PhysReg) const { + // A Phys Register is available if it is free or if itself and all of its + // alias are disabled + if (PhysRegState[PhysReg] == regFree) + return true; + for (MCRegAliasIterator AliasIt(PhysReg, TRI, /*IncludeSelf=*/true); + AliasIt.isValid(); ++AliasIt) { + if (PhysRegState[*AliasIt] != regDisabled) + return false; + } + return true; + } + MCPhysReg getBestRegForAllocation(const TargetRegisterClass &Class, + Register Hint0, Register Hint1) const; + MCPhysReg getFreeRegForAllocation(MachineInstr &MI, + const TargetRegisterClass &Class, + Register Hint0, Register Hint1); void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint); void allocVirtRegUndef(MachineOperand &MO); MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, @@ -319,8 +336,20 @@ int FI = getStackSpaceFor(VirtReg); LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n'); - const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI); + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + const TargetRegisterClass *SpillRC = + TRI->getRegClassForStackSpill(*MBB->getParent(), RC); + // If current register class cannot be directly spilled, copy to another + // register of the appropriate class before storing. + if (RC != SpillRC) { + MCPhysReg SpillReg = + getFreeRegForAllocation(*Before, *SpillRC, Register(), Register()); + BuildMI(*MBB, Before, Before->getDebugLoc(), TII->get(TargetOpcode::COPY), + SpillReg) + .addReg(AssignedReg, llvm::RegState::Kill); + AssignedReg = SpillReg; + } + TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, SpillRC, TRI); ++NumStores; // If this register is used by DBG_VALUE then insert new DBG_VALUE to @@ -345,9 +374,25 @@ LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into " << printReg(PhysReg, TRI) << '\n'); int FI = getStackSpaceFor(VirtReg); - const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI); + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + const TargetRegisterClass *SpillRC = + TRI->getRegClassForStackSpill(*MBB->getParent(), RC); + // If current register class cannot be directly loaded, get an intermediate + // register of the appropriate class then copy the loaded value + MCPhysReg SpillReg = PhysReg; + if (RC != SpillRC) { + SpillReg = + getFreeRegForAllocation(*Before, *SpillRC, Register(), Register()); + } + + TII->loadRegFromStackSlot(*MBB, Before, SpillReg, FI, SpillRC, TRI); ++NumLoads; + + // Copy back to original register if necessary + if (SpillReg != PhysReg) { + BuildMI(*MBB, Before, Before->getDebugLoc(), TII->get(TargetOpcode::COPY), PhysReg) + .addReg(SpillReg, llvm::RegState::Kill); + } } /// Return true if MO is the only remaining reference to its virtual register, @@ -520,7 +565,6 @@ /// allocated. void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, RegState NewState) { - markRegUsedInInstr(PhysReg); switch (Register VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; @@ -616,6 +660,7 @@ assert(PhysReg != 0 && "Trying to assign no register"); LR.PhysReg = PhysReg; setPhysRegState(PhysReg, VirtReg); + markRegUsedInInstr(PhysReg); } static bool isCoalescable(const MachineInstr &MI) { @@ -658,95 +703,74 @@ return Register(); } -/// Allocates a physical register for VirtReg. -void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) { - const Register VirtReg = LR.VirtReg; - - assert(Register::isVirtualRegister(VirtReg) && - "Can only allocate virtual registers"); - - const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg) - << " in class " << TRI->getRegClassName(&RC) - << " with hint " << printReg(Hint0, TRI) << '\n'); - - // Take hint when possible. - if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && - RC.contains(Hint0)) { - // Ignore the hint if we would have to spill a dirty register. - unsigned Cost = calcSpillCost(Hint0); - if (Cost < spillDirty) { - LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI) - << '\n'); - if (Cost) - definePhysReg(MI, Hint0, regFree); - assignVirtToPhysReg(LR, Hint0); - return; - } else { - LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI) - << "occupied\n"); - } - } else { - Hint0 = Register(); - } - - // Try other hint. - Register Hint1 = traceCopies(VirtReg); - if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && - RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) { - // Ignore the hint if we would have to spill a dirty register. - unsigned Cost = calcSpillCost(Hint1); - if (Cost < spillDirty) { - LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI) - << '\n'); - if (Cost) - definePhysReg(MI, Hint1, regFree); - assignVirtToPhysReg(LR, Hint1); - return; - } else { - LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI) - << "occupied\n"); - } - } else { - Hint1 = Register(); - } - +/// Finds out which is the best register available to allocate a virtual register +/// of the specified class, taking the hints whenever possible. +MCPhysReg +RegAllocFast::getBestRegForAllocation(const TargetRegisterClass &Class, + Register Hint0, Register Hint1) const { + // Try hints first + if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && Class.contains(Hint0) && + (calcSpillCost(Hint0) < spillDirty)) + return Hint0; + if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && Class.contains(Hint1) && + (calcSpillCost(Hint1) < spillDirty)) + return Hint1; + + // Otherwise, get reg with best cost following allocation order MCPhysReg BestReg = 0; unsigned BestCost = spillImpossible; - ArrayRef AllocationOrder = RegClassInfo.getOrder(&RC); + auto AllocationOrder = RegClassInfo.getOrder(&Class); for (MCPhysReg PhysReg : AllocationOrder) { - LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' '); unsigned Cost = calcSpillCost(PhysReg); - LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n'); - // Immediate take a register with cost 0. - if (Cost == 0) { - assignVirtToPhysReg(LR, PhysReg); - return; - } - - if (PhysReg == Hint1 || PhysReg == Hint0) + if (PhysReg == Hint0 || PhysReg == Hint1) Cost -= spillPrefBonus; - if (Cost < BestCost) { BestReg = PhysReg; BestCost = Cost; } + if (Cost == 0) break; } + return BestReg; +} - if (!BestReg) { - // Nothing we can do: Report an error and keep going with an invalid - // allocation. +/// Gets an avaliable register of the specified class for allocation, making sure +/// it is free and ready to be used and spilling when necessary. +MCPhysReg +RegAllocFast::getFreeRegForAllocation(MachineInstr &MI, + const TargetRegisterClass &Class, + Register Hint0, Register Hint1) { + MCPhysReg PhysReg = getBestRegForAllocation(Class, Hint0, Hint1); + + if (!PhysReg) { + // No register available + // Report an error and keep going with an invalid allocation if (MI.isInlineAsm()) MI.emitError("inline assembly requires more registers than available"); else MI.emitError("ran out of registers during register allocation"); - definePhysReg(MI, *AllocationOrder.begin(), regFree); - assignVirtToPhysReg(LR, *AllocationOrder.begin()); - return; + PhysReg = RegClassInfo.getOrder(&Class).front(); } - definePhysReg(MI, BestReg, regFree); - assignVirtToPhysReg(LR, BestReg); + if (!isAvailable(PhysReg)) + definePhysReg(MI, PhysReg, regFree); + return PhysReg; +} + +/// Allocates a physical register for VirtReg. +void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) { + const Register VirtReg = LR.VirtReg; + + assert(Register::isVirtualRegister(VirtReg) && + "Can only allocate virtual registers"); + + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg) + << " in class " << TRI->getRegClassName(&RC) + << " with hint " << printReg(Hint0, TRI) << '\n'); + + Register Hint1 = traceCopies(VirtReg); + MCPhysReg PhysReg = getFreeRegForAllocation(MI, RC, Hint0, Hint1); + assignVirtToPhysReg(LR, PhysReg); } void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { @@ -1065,6 +1089,7 @@ } else if (MO.isEarlyClobber()) { definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); + markRegUsedInInstr(Reg); hasEarlyClobbers = true; } else hasPhysDefs = true; @@ -1171,6 +1196,7 @@ if (!Reg || !Reg.isPhysical() || !MRI->isAllocatable(Reg)) continue; + markRegUsedInInstr(Reg); definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); } diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -15,6 +15,7 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/CallingConv.h" @@ -144,6 +145,10 @@ const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override; + const TargetRegisterClass * + getRegClassForStackSpill(const MachineFunction &MF, + const TargetRegisterClass *Class) const override; + const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -275,6 +275,14 @@ return RC; } +const TargetRegisterClass *ARMBaseRegisterInfo::getRegClassForStackSpill( + const MachineFunction &MF, const TargetRegisterClass *Class) const { + const ARMFunctionInfo *AFI = MF.getInfo(); + if (AFI->isThumb1OnlyFunction() && ARM::hGPRRegClass.hasSubClassEq(Class)) + return &ARM::tGPRRegClass; + return Class; +} + unsigned ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { diff --git a/llvm/test/CodeGen/Thumb/high-reg-spill.mir b/llvm/test/CodeGen/Thumb/high-reg-spill.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb/high-reg-spill.mir @@ -0,0 +1,59 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass regallocfast %s -o - | FileCheck %s + +--- | + ; ModuleID = 'test.c' + source_filename = "test.c" + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv6m-arm-none-eabi" + + define dso_local void @constraint_h(i32 %i) { + entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + call void asm sideeffect "mov r12, $0", "h,~{r12}"(i32 %0) + ret void + } + +... +--- +name: constraint_h +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: tgpr } + - { id: 1, class: hgpr } + - { id: 2, class: tgpr } +liveins: + - { reg: '$r0', virtual-reg: '%0' } +frameInfo: + maxAlignment: 4 + maxCallFrameSize: 0 + localFrameSize: 4 +stack: + - { id: 0, name: i.addr, size: 4, alignment: 4, local-offset: -4 } +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $r0 + + ; CHECK-LABEL: name: constraint_h + ; CHECK: liveins: $r0 + ; CHECK: tSTRspi killed renamable $r0, %stack.0.i.addr, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i.addr) + ; CHECK: renamable $r0 = tLDRspi %stack.0.i.addr, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from %ir.i.addr) + ; CHECK: renamable $r12 = COPY killed renamable $r0 + ; CHECK: $r0 = COPY killed $r12 + ; CHECK: tSTRspi killed $r0, %stack.1, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) + ; CHECK: $r0 = tLDRspi %stack.1, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) + ; CHECK: $r8 = COPY killed $r0 + ; CHECK: INLINEASM &"mov r12, $0", 1 /* sideeffect attdialect */, 1048585 /* reguse:hGPR */, renamable $r8, 12 /* clobber */, implicit-def early-clobber $r12 + ; CHECK: tBX_RET 14 /* CC::al */, $noreg + %0:tgpr = COPY $r0 + tSTRspi %0, %stack.0.i.addr, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i.addr) + %2:tgpr = tLDRspi %stack.0.i.addr, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from %ir.i.addr) + %1:hgpr = COPY %2 + INLINEASM &"mov r12, $0", 1 /* sideeffect attdialect */, 1048585 /* reguse:hGPR */, %1, 12 /* clobber */, implicit-def early-clobber $r12 + tBX_RET 14 /* CC::al */, $noreg + +... diff --git a/llvm/test/CodeGen/Thumb/no-regs-available.mir b/llvm/test/CodeGen/Thumb/no-regs-available.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb/no-regs-available.mir @@ -0,0 +1,51 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: not llc -run-pass regallocfast %s -o - 2>&1 | FileCheck %s + +--- | + ; ModuleID = 'all-high-regs.c' + source_filename = "all-high-regs.c" + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv6m-arm-none-eabi" + + define dso_local void @constraint_h(i32 %i) { + entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + call void asm sideeffect "mov r12, $0", "h,~{r12},~{r8},~{r9},~{r10},~{r11},~{lr}"(i32 %0) + ret void + } + +... +--- +name: constraint_h +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: tgpr } + - { id: 1, class: hgpr } + - { id: 2, class: tgpr } +liveins: + - { reg: '$r0', virtual-reg: '%0' } +frameInfo: + maxAlignment: 4 + maxCallFrameSize: 0 + localFrameSize: 4 +stack: + - { id: 0, name: i.addr, size: 4, alignment: 4, local-offset: -4 } +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $r0 + + %0:tgpr = COPY $r0 + tSTRspi %0, %stack.0.i.addr, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i.addr) + %2:tgpr = tLDRspi %stack.0.i.addr, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from %ir.i.addr) + %1:hgpr = COPY %2 + INLINEASM &"mov r12, $0", 1 /* sideeffect attdialect */, 1048585 /* reguse:hGPR */, %1, 12 /* clobber */, implicit-def early-clobber $r12, 12 /* clobber */, implicit-def early-clobber $r8, 12 /* clobber */, implicit-def early-clobber $r9, 12 /* clobber */, implicit-def early-clobber $r10, 12 /* clobber */, implicit-def early-clobber $r11, 12 /* clobber */, implicit-def early-clobber $lr + tBX_RET 14 /* CC::al */, $noreg + +... + +# CHECK: error: inline assembly requires more registers than available +