Index: lib/CodeGen/RegisterScavenging.cpp =================================================================== --- lib/CodeGen/RegisterScavenging.cpp +++ lib/CodeGen/RegisterScavenging.cpp @@ -39,6 +39,12 @@ #define DEBUG_TYPE "reg-scavenging" +cl::opt + AlwaysUseSpillSlot("reg-scavenging-always-use-spill-slot", cl::Hidden, + cl::init(false), + cl::desc("Always use a spill slot instead of an " + "available register during scavenging")); + void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) { LiveUnits.addRegMasked(Reg, LaneMask); } @@ -386,7 +392,7 @@ unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); // If we found an unused register there is no reason to spill it. - if (!isRegUsed(SReg)) { + if (!isRegUsed(SReg) && !AlwaysUseSpillSlot) { DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); return SReg; } @@ -443,19 +449,25 @@ ": Cannot scavenge register without an emergency spill slot!"; report_fatal_error(Msg.c_str()); } - TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, - RC, TRI); - MachineBasicBlock::iterator II = std::prev(I); - unsigned FIOperandNum = getFrameIndexOperandNum(*II); - TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); + if (isRegUsed(SReg)) { + // Don't generate the store to the stack slot if the register isn't + // used, as that will make the machine verifier fail. This can happen + // when AlwaysSpillSlot is true. + TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, + RC, TRI); + MachineBasicBlock::iterator II = std::prev(I); + + unsigned FIOperandNum = getFrameIndexOperandNum(*II); + TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); + } // Restore the scavenged register before its use (or first terminator). TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex, RC, TRI); - II = std::prev(UseMI); + MachineBasicBlock::iterator II = std::prev(UseMI); - FIOperandNum = getFrameIndexOperandNum(*II); + unsigned FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); } Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -122,6 +122,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include #include #include #include @@ -134,9 +135,40 @@ static cl::opt EnableRedZone("aarch64-redzone", cl::desc("enable use of redzone on AArch64"), cl::init(false), cl::Hidden); +extern cl::opt AlwaysUseSpillSlot; STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); +/// estimateRSStackSizeLimit - Look at each instruction that references stack +/// frames and return the stack size limit beyond which some of these +/// instructions will require a scratch register during their expansion later. +// FIXME: Move to TII? +static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { + // FIXME: For now, just conservatively guestimate based on unscaled indexing + // range. We'll end up allocating an unnecessary spill slot a lot, but + // realistically that's not a big deal at this stage of the game. + unsigned Limit = 256; + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (MI.isDebugValue() || MI.isPseudo() || + MI.getOpcode() == AArch64::ADDXri || + MI.getOpcode() == AArch64::ADDSXri) + continue; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + if (!MI.getOperand(i).isFI()) + continue; + + int Offset=0; + if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) == + AArch64FrameOffsetCannotUpdate) + Limit = 0; + } + } + } + + return Limit; +} + bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; @@ -1166,16 +1198,13 @@ unsigned NumRegsSpilled = SavedRegs.count(); bool CanEliminateFrame = NumRegsSpilled == 0; - // FIXME: Set BigStack if any stack slot references may be out of range. - // For now, just conservatively guestimate based on unscaled indexing - // range. We'll end up allocating an unnecessary spill slot a lot, but - // realistically that's not a big deal at this stage of the game. // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled; DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); - bool BigStack = (CFSize >= 256); + unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); + bool BigStack = (CFSize > EstimatedStackSizeLimit); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) AFI->setHasStackFrame(true); @@ -1186,7 +1215,8 @@ // above to keep the number of spills even, we don't need to do anything else // here. if (BigStack) { - if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) { + if (!AlwaysUseSpillSlot && !ExtraCSSpill && + UnspilledCSGPR != AArch64::NoRegister) { DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo) << " to get a scratch register.\n"); SavedRegs.set(UnspilledCSGPR); @@ -1201,7 +1231,8 @@ // If we didn't find an extra callee-saved register to spill, create // an emergency spill slot. - if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) { + if (AlwaysUseSpillSlot || !ExtraCSSpill || + MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const TargetRegisterClass &RC = AArch64::GPR64RegClass; unsigned Size = TRI->getSpillSize(RC); Index: test/CodeGen/AArch64/reg-scavenge-frame.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/reg-scavenge-frame.mir @@ -0,0 +1,22 @@ +# RUN: llc -run-pass=prologepilog -reg-scavenging-always-use-spill-slot=1 -verify-machineinstrs %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64" + define void @ScavengeForFrameWithoutOffset() { ret void } +... +--- +name: ScavengeForFrameWithoutOffset +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, offset: 0, size: 32, alignment: 8 } +body: | + bb.0: + %x10 = COPY %xzr + %d16_d17_d18_d19 = LD4Fourv4h killed %x10 + ST1Fourv1d killed %d16_d17_d18_d19, %stack.0 :: (store 32 into %stack.0, align 8) + +# CHECK: %x0 = ADDXri %sp, 16, 0 +# CHECK-NEXT: ST1Fourv1d killed %d16_d17_d18_d19, killed %x0 :: (store 32 into %stack.0, align 8) +# CHECK-NEXT: %x0 = LDRXui %sp, 1 :: (load 8 from %stack.1) +...