Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -550,7 +550,6 @@ // // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of AArch64ISD::TC_RETURN. - NumBytes += ArgumentPopSize; // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); @@ -566,12 +565,23 @@ assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { + bool RedZone = canUseRedZone(MF); // If this was a redzone leaf function, we don't need to restore the - // stack pointer. - if (!canUseRedZone(MF)) - emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, - TII, MachineInstr::FrameDestroy); - return; + // stack pointer (but we may need to pop stack args for fastcc). + if (RedZone && ArgumentPopSize == 0) + return; + + bool NoCalleeSaveRestore = AFI->getCalleeSavedStackSize() == 0; + int StackRestoreBytes = RedZone ? 0 : NumBytes; + if (NoCalleeSaveRestore) + StackRestoreBytes += ArgumentPopSize; + emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, + StackRestoreBytes, TII, MachineInstr::FrameDestroy); + // If we were able to combine the local stack pop with the argument pop, + // then we're done. + if (NoCalleeSaveRestore || ArgumentPopSize == 0) + return; + NumBytes = 0; } // Restore the original stack pointer. @@ -582,6 +592,13 @@ emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -AFI->getCalleeSavedStackSize() + 16, TII, MachineInstr::FrameDestroy); + + // This must be placed after the callee-save restore code because that code + // assumes the SP is at the same location as it was after the callee-save save + // code in the prologue. + if (ArgumentPopSize) + emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, + ArgumentPopSize, TII, MachineInstr::FrameDestroy); } /// getFrameIndexReference - Provide a base+offset reference to an FI slot for Index: test/CodeGen/AArch64/fastcc.ll =================================================================== --- test/CodeGen/AArch64/fastcc.ll +++ test/CodeGen/AArch64/fastcc.ll @@ -1,5 +1,6 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt -aarch64-redzone | FileCheck %s -check-prefix CHECK-TAIL-RZ ; Without tailcallopt fastcc still means the caller cleans up the ; stack, so try to make sure this is respected. @@ -97,6 +98,7 @@ ; CHECK-TAIL: mov sp, x29 ; CHECK-TAIL-NEXT: ldp x29, x30, [sp], #16 +; CHECK-TAIL-NEXT: add sp, sp, #16 ; CHECK-TAIL-NEXT: ret } @@ -140,5 +142,95 @@ ; CHECK-TAIL: mov sp, x29 ; CHECK-TAIL-NEXT: ldp x29, x30, [sp], #16 +; CHECK-TAIL-NEXT: add sp, sp, #32 ; CHECK-TAIL-NEXT: ret } + +; Check that arg stack pop is done after callee-save restore when no frame pointer is used. +define fastcc void @func_stack32_leaf([8 x i32], i128 %stacked0, i128 %stacked1) { +; CHECK-LABEL: func_stack32_leaf: +; CHECK: stp x20, x19, [sp, #-16]! +; CHECK: nop +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ldp x20, x19, [sp], #16 +; CHECK-NEXT: ret + +; CHECK-TAIL-LABEL: func_stack32_leaf: +; CHECK-TAIL: stp x20, x19, [sp, #-16]! +; CHECK-TAIL: nop +; CHECK-TAIL-NEXT: //NO_APP +; CHECK-TAIL-NEXT: ldp x20, x19, [sp], #16 +; CHECK-TAIL-NEXT: add sp, sp, #32 +; CHECK-TAIL-NEXT: ret + +; CHECK-TAIL-RZ-LABEL: func_stack32_leaf: +; CHECK-TAIL-RZ: stp x20, x19, [sp, #-16]! +; CHECK-TAIL-RZ-NOT: sub sp, sp +; CHECK-TAIL-RZ: nop +; CHECK-TAIL-RZ-NEXT: //NO_APP +; CHECK-TAIL-RZ-NEXT: ldp x20, x19, [sp], #16 +; CHECK-TAIL-RZ-NEXT: add sp, sp, #32 +; CHECK-TAIL-RZ-NEXT: ret + + ; Make sure there is a callee-save register to save/restore. + call void asm sideeffect "nop", "~{x20}"() nounwind + ret void +} + +; Check that arg stack pop is done after callee-save restore when no frame pointer is used. +define fastcc void @func_stack32_leaf_local([8 x i32], i128 %stacked0, i128 %stacked1) { +; CHECK-LABEL: func_stack32_leaf_local: +; CHECK: stp x20, x19, [sp, #-16]! +; CHECK-NEXT: sub sp, sp, #16 +; CHECK: nop +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x20, x19, [sp], #16 +; CHECK-NEXT: ret + +; CHECK-TAIL-LABEL: func_stack32_leaf_local: +; CHECK-TAIL: stp x20, x19, [sp, #-16]! +; CHECK-TAIL-NEXT: sub sp, sp, #16 +; CHECK-TAIL: nop +; CHECK-TAIL-NEXT: //NO_APP +; CHECK-TAIL-NEXT: add sp, sp, #16 +; CHECK-TAIL-NEXT: ldp x20, x19, [sp], #16 +; CHECK-TAIL-NEXT: add sp, sp, #32 +; CHECK-TAIL-NEXT: ret + +; CHECK-TAIL-RZ-LABEL: func_stack32_leaf_local: +; CHECK-TAIL-RZ: stp x20, x19, [sp, #-16]! +; CHECK-TAIL-RZ-NOT: sub sp, sp +; CHECK-TAIL-RZ: nop +; CHECK-TAIL-RZ-NEXT: //NO_APP +; CHECK-TAIL-RZ-NEXT: ldp x20, x19, [sp], #16 +; CHECK-TAIL-RZ-NEXT: add sp, sp, #32 +; CHECK-TAIL-RZ-NEXT: ret + + %val0 = alloca [2 x i64], align 8 + + ; Make sure there is a callee-save register to save/restore. + call void asm sideeffect "nop", "~{x20}"() nounwind + ret void +} + +; Check that arg stack pop is done after callee-save restore when no frame pointer is used. +define fastcc void @func_stack32_leaf_local_nocs([8 x i32], i128 %stacked0, i128 %stacked1) { +; CHECK-LABEL: func_stack32_leaf_local_nocs: +; CHECK: sub sp, sp, #16 +; CHECK: add sp, sp, #16 +; CHECK-NEXT: ret + +; CHECK-TAIL-LABEL: func_stack32_leaf_local_nocs: +; CHECK-TAIL: sub sp, sp, #16 +; CHECK-TAIL: add sp, sp, #48 +; CHECK-TAIL-NEXT: ret + +; CHECK-TAIL-RZ-LABEL: func_stack32_leaf_local_nocs: +; CHECK-TAIL-RZ: add sp, sp, #32 +; CHECK-TAIL-RZ-NEXT: ret + + %val0 = alloca [2 x i64], align 8 + + ret void +} \ No newline at end of file