Skip to content

Commit e97fb41

Browse files
committedMar 31, 2016
[X86] Merge adjacent stack adjustments in eliminateCallFramePseudoInstr (PR27140)
For code such as: void f(int, int); void g() { f(1, 2); } compiled for 32-bit X86 Linux, Clang would previously generate: subl $12, %esp subl $8, %esp pushl $2 pushl $1 calll f addl $16, %esp addl $12, %esp retl This patch fixes that by merging adjacent stack adjustments in eliminateCallFramePseudoInstr(). Differential Revision: http://reviews.llvm.org/D18627 llvm-svn: 265039
1 parent 8c824a0 commit e97fb41

9 files changed

+65
-30
lines changed
 

‎llvm/lib/Target/X86/X86FrameLowering.cpp

+19-12
Original file line numberDiff line numberDiff line change
@@ -2534,13 +2534,22 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
25342534
BuildCFI(MBB, I, DL,
25352535
MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
25362536

2537-
if (Amount) {
2538-
// Add Amount to SP to destroy a frame, and subtract to setup.
2539-
int Offset = isDestroy ? Amount : -Amount;
2537+
// Add Amount to SP to destroy a frame, or subtract to setup.
2538+
int64_t StackAdjustment = isDestroy ? Amount : -Amount;
25402539

2541-
if (!(Fn->optForMinSize() &&
2542-
adjustStackWithPops(MBB, I, DL, Offset)))
2543-
BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
2540+
if (StackAdjustment) {
2541+
// Merge with any previous or following adjustment instruction.
2542+
StackAdjustment += mergeSPUpdates(MBB, I, true);
2543+
StackAdjustment += mergeSPUpdates(MBB, I, false);
2544+
2545+
if (!StackAdjustment) {
2546+
// This and the merged instruction canceled out each other.
2547+
return I;
2548+
}
2549+
2550+
if (!(Fn->optForMinSize() &&
2551+
adjustStackWithPops(MBB, I, DL, StackAdjustment)))
2552+
BuildStackAdjustment(MBB, I, DL, StackAdjustment, /*InEpilogue=*/false);
25442553
}
25452554

25462555
if (DwarfCFI && !hasFP(MF)) {
@@ -2550,14 +2559,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
25502559
// CFI only for EH purposes or for debugging. EH only requires the CFA
25512560
// offset to be correct at each call site, while for debugging we want
25522561
// it to be more precise.
2553-
int CFAOffset = Amount;
2562+
25542563
// TODO: When not using precise CFA, we also need to adjust for the
25552564
// InternalAmt here.
2556-
2557-
if (CFAOffset) {
2558-
CFAOffset = isDestroy ? -CFAOffset : CFAOffset;
2559-
BuildCFI(MBB, I, DL,
2560-
MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset));
2565+
if (StackAdjustment) {
2566+
BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset(
2567+
nullptr, -StackAdjustment));
25612568
}
25622569
}
25632570

‎llvm/test/CodeGen/X86/2006-05-02-InstrSched1.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; REQUIRES: asserts
22
; RUN: llc < %s -mtriple=i686-unknown-linux -relocation-model=static -stats 2>&1 | \
3-
; RUN: grep asm-printer | grep 15
3+
; RUN: grep asm-printer | grep 14
44
;
55
; It's possible to schedule this in 14 instructions by avoiding
66
; callee-save registers, but the scheduler isn't currently that

‎llvm/test/CodeGen/X86/fold-push.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ define void @test(i32 %a, i32 %b) optsize nounwind {
1414
; SLM: movl (%esp), [[RELOAD:%e..]]
1515
; SLM-NEXT: pushl [[RELOAD]]
1616
; CHECK: calll
17-
; CHECK-NEXT: addl $4, %esp
17+
; CHECK-NEXT: addl $8, %esp
1818
%c = add i32 %a, %b
1919
call void @foo(i32 %c)
2020
call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()

‎llvm/test/CodeGen/X86/force-align-stack-alloca.ll

+10-4
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,21 @@ define i64 @g(i32 %i) nounwind {
3232
; CHECK: movl %{{...}}, %esp
3333
; CHECK-NOT: {{[^ ,]*}}, %esp
3434
;
35-
; Next we set up the memset call, and then undo it.
35+
; Next we set up the memset call.
3636
; CHECK: subl $20, %esp
3737
; CHECK-NOT: {{[^ ,]*}}, %esp
38+
; CHECK: pushl
39+
; CHECK: pushl
40+
; CHECK: pushl
3841
; CHECK: calll memset
39-
; CHECK-NEXT: addl $32, %esp
42+
;
43+
; Deallocating 32 bytes of outgoing call frame for memset and
44+
; allocating 28 bytes for calling f yields a 4-byte adjustment:
45+
; CHECK-NEXT: addl $4, %esp
4046
; CHECK-NOT: {{[^ ,]*}}, %esp
4147
;
42-
; Next we set up the call to 'f'.
43-
; CHECK: subl $28, %esp
48+
; And move on to call 'f', and then restore the stack.
49+
; CHECK: pushl
4450
; CHECK-NOT: {{[^ ,]*}}, %esp
4551
; CHECK: calll f
4652
; CHECK-NEXT: addl $32, %esp

‎llvm/test/CodeGen/X86/localescape.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,5 @@ define void @alloc_func_no_frameaddr() {
137137
; X86: movl $13, (%esp)
138138
; X86: pushl $0
139139
; X86: calll _print_framealloc_from_fp
140-
; X86: addl $4, %esp
141-
; X86: addl $8, %esp
140+
; X86: addl $12, %esp
142141
; X86: retl

‎llvm/test/CodeGen/X86/memset-2.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
55

66
define fastcc void @t1() nounwind {
77
; CHECK-LABEL: t1:
8-
; CHECK: subl $12, %esp
8+
; CHECK: subl $16, %esp
99
; CHECK: pushl $188
1010
; CHECK-NEXT: pushl $0
1111
; CHECK-NEXT: pushl $0

‎llvm/test/CodeGen/X86/movtopush.ll

+28-5
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
; RUN: llc < %s -mtriple=i686-windows -no-x86-call-frame-opt | FileCheck %s -check-prefix=NOPUSH
33
; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64
44
; RUN: llc < %s -mtriple=i686-windows -stackrealign -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
5+
; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX
56

67
%class.Class = type { i32 }
78
%struct.s = type { i64 }
@@ -223,8 +224,7 @@ entry:
223224
; NORMAL-NEXT: pushl $2
224225
; NORMAL-NEXT: pushl $1
225226
; NORMAL-NEXT: call
226-
; NORMAL-NEXT: addl $16, %esp
227-
; NORMAL-NEXT: subl $20, %esp
227+
; NORMAL-NEXT: subl $4, %esp
228228
; NORMAL-NEXT: movl 20(%esp), [[E1:%e..]]
229229
; NORMAL-NEXT: movl 24(%esp), [[E2:%e..]]
230230
; NORMAL-NEXT: movl [[E2]], 4(%esp)
@@ -261,7 +261,7 @@ entry:
261261
; NORMAL-NEXT: pushl $2
262262
; NORMAL-NEXT: pushl $1
263263
; NORMAL-NEXT: calll *16(%esp)
264-
; NORMAL-NEXT: addl $16, %esp
264+
; NORMAL-NEXT: addl $24, %esp
265265
define void @test10() optsize {
266266
%stack_fptr = alloca void (i32, i32, i32, i32)*
267267
store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr
@@ -314,8 +314,7 @@ entry:
314314
; NORMAL-NEXT: pushl $2
315315
; NORMAL-NEXT: pushl $1
316316
; NORMAL-NEXT: calll _good
317-
; NORMAL-NEXT: addl $16, %esp
318-
; NORMAL-NEXT: subl $20, %esp
317+
; NORMAL-NEXT: subl $4, %esp
319318
; NORMAL: movl $8, 16(%esp)
320319
; NORMAL-NEXT: movl $7, 12(%esp)
321320
; NORMAL-NEXT: movl $6, 8(%esp)
@@ -358,3 +357,27 @@ entry:
358357
call void @good(i32 %val1, i32 %val2, i32 %val3, i32 %add)
359358
ret i32* %ptr3
360359
}
360+
361+
; Make sure to fold adjacent stack adjustments.
362+
; LINUX-LABEL: pr27140:
363+
; LINUX: subl $12, %esp
364+
; LINUX: .cfi_def_cfa_offset 16
365+
; LINUX-NOT: sub
366+
; LINUX: pushl $4
367+
; LINUX: .cfi_adjust_cfa_offset 4
368+
; LINUX: pushl $3
369+
; LINUX: .cfi_adjust_cfa_offset 4
370+
; LINUX: pushl $2
371+
; LINUX: .cfi_adjust_cfa_offset 4
372+
; LINUX: pushl $1
373+
; LINUX: .cfi_adjust_cfa_offset 4
374+
; LINUX: calll good
375+
; LINUX: addl $28, %esp
376+
; LINUX: .cfi_adjust_cfa_offset -28
377+
; LINUX-NOT: add
378+
; LINUX: retl
379+
define void @pr27140() optsize {
380+
entry:
381+
tail call void @good(i32 1, i32 2, i32 3, i32 4)
382+
ret void
383+
}

‎llvm/test/CodeGen/X86/push-cfi-debug.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ declare x86_stdcallcc void @stdfoo(i32, i32) #0
2323
; CHECK: .cfi_adjust_cfa_offset 4
2424
; CHECK: calll stdfoo
2525
; CHECK: .cfi_adjust_cfa_offset -8
26-
; CHECK: addl $8, %esp
27-
; CHECK: .cfi_adjust_cfa_offset -8
26+
; CHECK: addl $20, %esp
27+
; CHECK: .cfi_adjust_cfa_offset -20
2828
define void @test1() #0 !dbg !4 {
2929
entry:
3030
tail call void @foo(i32 1, i32 2) #1, !dbg !10

‎llvm/test/CodeGen/X86/push-cfi.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,8 @@ cleanup:
8282
; LINUX-NEXT: Ltmp{{[0-9]+}}:
8383
; LINUX-NEXT: .cfi_adjust_cfa_offset 4
8484
; LINUX-NEXT: call
85-
; LINUX-NEXT: addl $16, %esp
86-
; LINUX: .cfi_adjust_cfa_offset -16
85+
; LINUX-NEXT: addl $28, %esp
86+
; LINUX: .cfi_adjust_cfa_offset -28
8787
; DARWIN-NOT: .cfi_escape
8888
; DARWIN-NOT: pushl
8989
define void @test2_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {

0 commit comments

Comments
 (0)
Please sign in to comment.