Skip to content

Commit 5c290dc

Browse files
committedJan 19, 2018
AArch64: Fix emergency spillslot being out of reach for large callframes
Re-commit of r322200: The testcase shouldn't hit machineverifiers anymore with r322917 in place. Large callframes (calls with several hundreds or thousands or parameters) could lead to situations in which the emergency spillslot is out of range to be addressed relative to the stack pointer. This commit forces the use of a frame pointer in the presence of large callframes. This commit does several things: - Compute max callframe size at the end of instruction selection. - Add mirFileLoaded target callback. Use it to compute the max callframe size after loading a .mir file when the size wasn't specified in the file. - Let TargetFrameLowering::hasFP() return true if there exists a callframe > 255 bytes. - Always place the emergency spillslot close to FP if we have a frame pointer. - Note that `useFPForScavengingIndex()` would previously return false when a base pointer was available leading to the emergency spillslot getting allocated late (that's the whole effect of this callback). Which made no sense to me so I took this case out: Even though the emergency spillslot is technically not referenced by FP in this case we still want it allocated early. Differential Revision: https://reviews.llvm.org/D40876 llvm-svn: 322919
1 parent 702ffea commit 5c290dc

File tree

10 files changed

+75
-11
lines changed

10 files changed

+75
-11
lines changed
 

‎llvm/include/llvm/CodeGen/TargetSubtargetInfo.h

+3
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,9 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
248248
/// Returns string representation of scheduler comment
249249
std::string getSchedInfoStr(const MachineInstr &MI) const override;
250250
std::string getSchedInfoStr(MCInst const &MCI) const override;
251+
252+
/// This is called after a .mir file was loaded.
253+
virtual void mirFileLoaded(MachineFunction &MF) const;
251254
};
252255

253256
} // end namespace llvm

‎llvm/lib/CodeGen/MIRParser/MIRParser.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
417417

418418
computeFunctionProperties(MF);
419419

420+
MF.getSubtarget().mirFileLoaded(MF);
421+
420422
MF.verify();
421423
return false;
422424
}

‎llvm/lib/CodeGen/TargetSubtargetInfo.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,6 @@ std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
111111
TSchedModel.computeInstrRThroughput(MCI.getOpcode());
112112
return createSchedInfoStr(Latency, RThroughput);
113113
}
114+
115+
void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const {
116+
}

‎llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

+26-6
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,12 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone",
142142

143143
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
144144

145+
/// This is the biggest offset to the stack pointer we can encode in aarch64
146+
/// instructions (without using a separate calculation and a temp register).
147+
/// Note that the exception here are vector stores/loads which cannot encode any
148+
/// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
149+
static const unsigned DefaultSafeSPDisplacement = 255;
150+
145151
/// Look at each instruction that references stack frames and return the stack
146152
/// size limit beyond which some of these instructions will require a scratch
147153
/// register during their expansion later.
@@ -167,7 +173,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
167173
}
168174
}
169175
}
170-
return 255;
176+
return DefaultSafeSPDisplacement;
171177
}
172178

173179
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
@@ -191,11 +197,25 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
191197
const MachineFrameInfo &MFI = MF.getFrameInfo();
192198
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
193199
// Retain behavior of always omitting the FP for leaf functions when possible.
194-
return (MFI.hasCalls() &&
195-
MF.getTarget().Options.DisableFramePointerElim(MF)) ||
196-
MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
197-
MFI.hasStackMap() || MFI.hasPatchPoint() ||
198-
RegInfo->needsStackRealignment(MF);
200+
if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF))
201+
return true;
202+
if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
203+
MFI.hasStackMap() || MFI.hasPatchPoint() ||
204+
RegInfo->needsStackRealignment(MF))
205+
return true;
206+
// With large callframes around we may need to use FP to access the scavenging
207+
// emergency spillslot.
208+
//
209+
// Unfortunately some calls to hasFP() like machine verifier ->
210+
// getReservedReg() -> hasFP in the middle of global isel are too early
211+
// to know the max call frame size. Hopefully conservatively returning "true"
212+
// in those cases is fine.
213+
// DefaultSafeSPDisplacement is fine as we only emergency spill GP regs.
214+
if (!MFI.isMaxCallFrameSizeComputed() ||
215+
MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
216+
return true;
217+
218+
return false;
199219
}
200220

201221
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is

‎llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -10982,3 +10982,8 @@ AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
1098210982

1098310983
return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
1098410984
}
10985+
10986+
void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
10987+
MF.getFrameInfo().computeMaxCallFrameSize(MF);
10988+
TargetLoweringBase::finalizeLowering(MF);
10989+
}

‎llvm/lib/Target/AArch64/AArch64ISelLowering.h

+2
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,8 @@ class AArch64TargetLowering : public TargetLowering {
648648
SelectionDAG &DAG) const override;
649649

650650
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
651+
652+
void finalizeLowering(MachineFunction &MF) const override;
651653
};
652654

653655
namespace AArch64 {

‎llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

+7-5
Original file line numberDiff line numberDiff line change
@@ -225,11 +225,13 @@ bool AArch64RegisterInfo::requiresVirtualBaseRegisters(
225225

226226
bool
227227
AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
228-
const MachineFrameInfo &MFI = MF.getFrameInfo();
229-
// AArch64FrameLowering::resolveFrameIndexReference() can always fall back
230-
// to the stack pointer, so only put the emergency spill slot next to the
231-
// FP when there's no better way to access it (SP or base pointer).
232-
return MFI.hasVarSizedObjects() && !hasBasePointer(MF);
228+
// This function indicates whether the emergency spillslot should be placed
229+
// close to the beginning of the stackframe (closer to FP) or the end
230+
// (closer to SP).
231+
//
232+
// The beginning works most reliably if we have a frame pointer.
233+
const AArch64FrameLowering &TFI = *getFrameLowering(MF);
234+
return TFI.hasFP(MF);
233235
}
234236

235237
bool AArch64RegisterInfo::requiresFrameIndexScavenging(

‎llvm/lib/Target/AArch64/AArch64Subtarget.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -250,3 +250,13 @@ std::unique_ptr<PBQPRAConstraint>
250250
AArch64Subtarget::getCustomPBQPConstraints() const {
251251
return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
252252
}
253+
254+
void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
255+
// We usually compute max call frame size after ISel. Do the computation now
256+
// if the .mir file didn't specify it. Note that this will probably give you
257+
// bogus values after PEI has eliminated the callframe setup/destroy pseudo
258+
// instructions, specify explicitely if you need it to be correct.
259+
MachineFrameInfo &MFI = MF.getFrameInfo();
260+
if (!MFI.isMaxCallFrameSizeComputed())
261+
MFI.computeMaxCallFrameSize(MF);
262+
}

‎llvm/lib/Target/AArch64/AArch64Subtarget.h

+2
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
326326
return false;
327327
}
328328
}
329+
330+
void mirFileLoaded(MachineFunction &MF) const override;
329331
};
330332
} // End llvm namespace
331333

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
; RUN: llc -o - %s -verify-machineinstrs | FileCheck %s
2+
; Make sure we use a frame pointer and fp relative addressing for the emergency
3+
; spillslot when we have gigantic callframes.
4+
; CHECK-LABEL: func:
5+
; CHECK: stur {{.*}}, [x29, #{{.*}}] // 8-byte Folded Spill
6+
; CHECK: ldur {{.*}}, [x29, #{{.*}}] // 8-byte Folded Reload
7+
target triple = "aarch64--"
8+
declare void @extfunc([4096 x i64]* byval %p)
9+
define void @func([4096 x i64]* %z) {
10+
%lvar = alloca [31 x i8]
11+
%v = load volatile [31 x i8], [31 x i8]* %lvar
12+
store volatile [31 x i8] %v, [31 x i8]* %lvar
13+
call void @extfunc([4096 x i64]* byval %z)
14+
ret void
15+
}

0 commit comments

Comments
 (0)