Skip to content

Commit eacf4e4

Browse files
committedAug 1, 2017
[AArch64] Rewrite stack frame handling for win64 vararg functions
The previous attempt, which made do with a single offset in computeCalleeSaveRegisterPairs, wasn't quite enough. The previous attempt only worked as long as CombineSPBump == true (since the offset would be adjusted later in fixupCalleeSaveRestoreStackOffset). Instead include the size for the fixed stack area used for win64 varargs in calculations in emitPrologue/emitEpilogue. The stack consists of mainly three parts; - AFI->getLocalStackSize() - AFI->getCalleeSavedStackSize() - FixedObject Most of the places in the code which previously used the CSStackSize now use PrologueSaveSize instead, which is the sum of the latter two, while some cases which need exactly the middle one use AFI->getCalleeSavedStackSize() explicitly instead of a local variable. In addition to moving the offsetting into emitPrologue/emitEpilogue (which fixes functions with CombineSPBump == false), also set the frame pointer to point to the right location, where the frame pointer and link register actually are stored. In addition to the prologue/epilogue, this also requires changes to resolveFrameIndexReference. Add tests for a function that keeps a frame pointer and another one that uses a VLA. Differential Revision: https://reviews.llvm.org/D35919 llvm-svn: 309744
1 parent 206f826 commit eacf4e4

File tree

3 files changed

+141
-26
lines changed

3 files changed

+141
-26
lines changed
 

‎llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

+30-22
Original file line numberDiff line numberDiff line change
@@ -506,19 +506,23 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
506506
return;
507507
}
508508

509-
auto CSStackSize = AFI->getCalleeSavedStackSize();
509+
bool IsWin64 =
510+
Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
511+
unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
512+
513+
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
510514
// All of the remaining stack allocations are for locals.
511-
AFI->setLocalStackSize(NumBytes - CSStackSize);
515+
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
512516

513517
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
514518
if (CombineSPBump) {
515519
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
516520
MachineInstr::FrameSetup);
517521
NumBytes = 0;
518-
} else if (CSStackSize != 0) {
522+
} else if (PrologueSaveSize != 0) {
519523
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
520-
-CSStackSize);
521-
NumBytes -= CSStackSize;
524+
-PrologueSaveSize);
525+
NumBytes -= PrologueSaveSize;
522526
}
523527
assert(NumBytes >= 0 && "Negative stack allocation size!?");
524528

@@ -532,8 +536,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
532536
++MBBI;
533537
}
534538
if (HasFP) {
535-
// Only set up FP if we actually need to. Frame pointer is fp = sp - 16.
536-
int FPOffset = CSStackSize - 16;
539+
// Only set up FP if we actually need to. Frame pointer is fp =
540+
// sp - fixedobject - 16.
541+
int FPOffset = AFI->getCalleeSavedStackSize() - 16;
537542
if (CombineSPBump)
538543
FPOffset += AFI->getLocalStackSize();
539544

@@ -672,8 +677,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
672677
if (HasFP) {
673678
// Define the current CFA rule to use the provided FP.
674679
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
675-
unsigned CFIIndex = MF.addFrameInst(
676-
MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
680+
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
681+
nullptr, Reg, 2 * StackGrowth - FixedObject));
677682
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
678683
.addCFIIndex(CFIIndex)
679684
.setMIFlags(MachineInstr::FrameSetup);
@@ -759,12 +764,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
759764
// AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
760765
// it as the 2nd argument of AArch64ISD::TC_RETURN.
761766

762-
auto CSStackSize = AFI->getCalleeSavedStackSize();
767+
bool IsWin64 =
768+
Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
769+
unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
770+
771+
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
763772
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
764773

765-
if (!CombineSPBump && CSStackSize != 0)
774+
if (!CombineSPBump && PrologueSaveSize != 0)
766775
convertCalleeSaveRestoreToSPPrePostIncDec(
767-
MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize);
776+
MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize);
768777

769778
// Move past the restores of the callee-saved registers.
770779
MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
@@ -786,7 +795,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
786795
return;
787796
}
788797

789-
NumBytes -= CSStackSize;
798+
NumBytes -= PrologueSaveSize;
790799
assert(NumBytes >= 0 && "Negative stack allocation size!?");
791800

792801
if (!hasFP(MF)) {
@@ -796,7 +805,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
796805
if (RedZone && ArgumentPopSize == 0)
797806
return;
798807

799-
bool NoCalleeSaveRestore = CSStackSize == 0;
808+
bool NoCalleeSaveRestore = PrologueSaveSize == 0;
800809
int StackRestoreBytes = RedZone ? 0 : NumBytes;
801810
if (NoCalleeSaveRestore)
802811
StackRestoreBytes += ArgumentPopSize;
@@ -815,7 +824,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
815824
// be able to save any instructions.
816825
if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
817826
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
818-
-CSStackSize + 16, TII, MachineInstr::FrameDestroy);
827+
-AFI->getCalleeSavedStackSize() + 16, TII,
828+
MachineInstr::FrameDestroy);
819829
else if (NumBytes)
820830
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
821831
MachineInstr::FrameDestroy);
@@ -845,7 +855,11 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
845855
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
846856
MF.getSubtarget().getRegisterInfo());
847857
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
848-
int FPOffset = MFI.getObjectOffset(FI) + 16;
858+
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
859+
bool IsWin64 =
860+
Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
861+
unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
862+
int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
849863
int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
850864
bool isFixed = MFI.isFixedObjectIndex(FI);
851865

@@ -956,12 +970,6 @@ static void computeCalleeSaveRegisterPairs(
956970
"Odd number of callee-saved regs to spill!");
957971
int Offset = AFI->getCalleeSavedStackSize();
958972

959-
unsigned GPRSaveSize = AFI->getVarArgsGPRSize();
960-
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
961-
bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
962-
if (IsWin64)
963-
Offset -= alignTo(GPRSaveSize, 16);
964-
965973
for (unsigned i = 0; i < Count; ++i) {
966974
RegPairInfo RPI;
967975
RPI.Reg1 = CSI[i].getReg();

‎llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22

33
define win64cc void @pass_va(i32 %count, ...) nounwind {
44
entry:
5-
; CHECK: sub sp, sp, #80
5+
; CHECK: str x30, [sp, #-80]!
66
; CHECK: add x8, sp, #24
77
; CHECK: add x0, sp, #24
88
; CHECK: stp x6, x7, [sp, #64]
99
; CHECK: stp x4, x5, [sp, #48]
1010
; CHECK: stp x2, x3, [sp, #32]
1111
; CHECK: str x1, [sp, #24]
12-
; CHECK: stp x30, x8, [sp]
12+
; CHECK: str x8, [sp, #8]
1313
; CHECK: bl other_func
1414
; CHECK: ldr x30, [sp], #80
1515
; CHECK: ret

‎llvm/test/CodeGen/AArch64/win64_vararg.ll

+109-2
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22

33
define void @pass_va(i32 %count, ...) nounwind {
44
entry:
5-
; CHECK: sub sp, sp, #80
5+
; CHECK: str x30, [sp, #-80]!
66
; CHECK: add x8, sp, #24
77
; CHECK: add x0, sp, #24
88
; CHECK: stp x6, x7, [sp, #64]
99
; CHECK: stp x4, x5, [sp, #48]
1010
; CHECK: stp x2, x3, [sp, #32]
1111
; CHECK: str x1, [sp, #24]
12-
; CHECK: stp x30, x8, [sp]
12+
; CHECK: str x8, [sp, #8]
1313
; CHECK: bl other_func
1414
; CHECK: ldr x30, [sp], #80
1515
; CHECK: ret
@@ -102,6 +102,113 @@ declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
102102
declare i32 @__stdio_common_vsprintf(i64, i8*, i64, i8*, i8*, i8*) local_unnamed_addr #3
103103
declare i64* @__local_stdio_printf_options() local_unnamed_addr #4
104104

105+
; CHECK-LABEL: fp
106+
; CHECK: str x21, [sp, #-96]!
107+
; CHECK: stp x20, x19, [sp, #16]
108+
; CHECK: stp x29, x30, [sp, #32]
109+
; CHECK: add x29, sp, #32
110+
; CHECK: add x8, x29, #24
111+
; CHECK: mov x19, x2
112+
; CHECK: mov x20, x1
113+
; CHECK: mov x21, x0
114+
; CHECK: stp x6, x7, [x29, #48]
115+
; CHECK: stp x4, x5, [x29, #32]
116+
; CHECK: str x3, [x29, #24]
117+
; CHECK: str x8, [sp, #8]
118+
; CHECK: bl __local_stdio_printf_options
119+
; CHECK: ldr x8, [x0]
120+
; CHECK: add x5, x29, #24
121+
; CHECK: mov x1, x21
122+
; CHECK: mov x2, x20
123+
; CHECK: orr x0, x8, #0x2
124+
; CHECK: mov x3, x19
125+
; CHECK: mov x4, xzr
126+
; CHECK: bl __stdio_common_vsprintf
127+
; CHECK: ldp x29, x30, [sp, #32]
128+
; CHECK: ldp x20, x19, [sp, #16]
129+
; CHECK: cmp w0, #0
130+
; CHECK: csinv w0, w0, wzr, ge
131+
; CHECK: ldr x21, [sp], #96
132+
; CHECK: ret
133+
define i32 @fp(i8*, i64, i8*, ...) local_unnamed_addr #6 {
134+
%4 = alloca i8*, align 8
135+
%5 = bitcast i8** %4 to i8*
136+
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %5) #2
137+
call void @llvm.va_start(i8* nonnull %5)
138+
%6 = load i8*, i8** %4, align 8
139+
%7 = call i64* @__local_stdio_printf_options() #2
140+
%8 = load i64, i64* %7, align 8
141+
%9 = or i64 %8, 2
142+
%10 = call i32 @__stdio_common_vsprintf(i64 %9, i8* %0, i64 %1, i8* %2, i8* null, i8* %6) #2
143+
%11 = icmp sgt i32 %10, -1
144+
%12 = select i1 %11, i32 %10, i32 -1
145+
call void @llvm.va_end(i8* nonnull %5)
146+
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %5) #2
147+
ret i32 %12
148+
}
149+
150+
attributes #6 = { "no-frame-pointer-elim"="true" }
151+
152+
; CHECK-LABEL: vla
153+
; CHECK: str x23, [sp, #-112]!
154+
; CHECK: stp x22, x21, [sp, #16]
155+
; CHECK: stp x20, x19, [sp, #32]
156+
; CHECK: stp x29, x30, [sp, #48]
157+
; CHECK: add x29, sp, #48
158+
; CHECK: add x8, x29, #16
159+
; CHECK: stur x8, [x29, #-40]
160+
; CHECK: mov w8, w0
161+
; CHECK: add x8, x8, #15
162+
; CHECK: mov x9, sp
163+
; CHECK: and x8, x8, #0x1fffffff0
164+
; CHECK: sub x20, x9, x8
165+
; CHECK: mov x19, x1
166+
; CHECK: mov x23, sp
167+
; CHECK: stp x6, x7, [x29, #48]
168+
; CHECK: stp x4, x5, [x29, #32]
169+
; CHECK: stp x2, x3, [x29, #16]
170+
; CHECK: mov sp, x20
171+
; CHECK: ldur x21, [x29, #-40]
172+
; CHECK: sxtw x22, w0
173+
; CHECK: bl __local_stdio_printf_options
174+
; CHECK: ldr x8, [x0]
175+
; CHECK: mov x1, x20
176+
; CHECK: mov x2, x22
177+
; CHECK: mov x3, x19
178+
; CHECK: orr x0, x8, #0x2
179+
; CHECK: mov x4, xzr
180+
; CHECK: mov x5, x21
181+
; CHECK: bl __stdio_common_vsprintf
182+
; CHECK: mov sp, x23
183+
; CHECK: sub sp, x29, #48
184+
; CHECK: ldp x29, x30, [sp, #48]
185+
; CHECK: ldp x20, x19, [sp, #32]
186+
; CHECK: ldp x22, x21, [sp, #16]
187+
; CHECK: ldr x23, [sp], #112
188+
; CHECK: ret
189+
define void @vla(i32, i8*, ...) local_unnamed_addr {
190+
%3 = alloca i8*, align 8
191+
%4 = bitcast i8** %3 to i8*
192+
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %4) #5
193+
call void @llvm.va_start(i8* nonnull %4)
194+
%5 = zext i32 %0 to i64
195+
%6 = call i8* @llvm.stacksave()
196+
%7 = alloca i8, i64 %5, align 1
197+
%8 = load i8*, i8** %3, align 8
198+
%9 = sext i32 %0 to i64
199+
%10 = call i64* @__local_stdio_printf_options()
200+
%11 = load i64, i64* %10, align 8
201+
%12 = or i64 %11, 2
202+
%13 = call i32 @__stdio_common_vsprintf(i64 %12, i8* nonnull %7, i64 %9, i8* %1, i8* null, i8* %8)
203+
call void @llvm.va_end(i8* nonnull %4)
204+
call void @llvm.stackrestore(i8* %6)
205+
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %4) #5
206+
ret void
207+
}
208+
209+
declare i8* @llvm.stacksave()
210+
declare void @llvm.stackrestore(i8*)
211+
105212
; CHECK-LABEL: snprintf
106213
; CHECK: sub sp, sp, #96
107214
; CHECK: stp x21, x20, [sp, #16]

0 commit comments

Comments
 (0)
Please sign in to comment.