Index: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -127,9 +127,6 @@ const AArch64FunctionInfo *AFI = MF.getInfo(); unsigned NumBytes = AFI->getLocalStackSize(); - // Note: currently hasFP() is always true for hasCalls(), but that's an - // implementation detail of the current code, not a strict requirement, - // so stay safe here and check both. return !(MFI->hasCalls() || hasFP(MF) || NumBytes > 128); } @@ -138,9 +135,12 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - return (MFI->hasCalls() || MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken() || MFI->hasStackMap() || - MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF)); + // Retain behavior of always omitting the FP for leaf functions when possible. + return (MFI->hasCalls() && + MF.getTarget().Options.DisableFramePointerElim(MF)) || + MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || + MFI->hasStackMap() || MFI->hasPatchPoint() || + RegInfo->needsStackRealignment(MF); } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is Index: llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -229,9 +229,7 @@ bool AArch64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - // Only consider eliminating leaf frames. - if (MFI->hasCalls() || (MF.getTarget().Options.DisableFramePointerElim(MF) && - MFI->adjustsStack())) + if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack()) return true; return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); } Index: llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll +++ llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll @@ -1,5 +1,5 @@ -;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=true -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=DEFERRED -;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=false -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=REGULAR +;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=true -mcpu=cortex-a57 -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=DEFERRED +;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=false -mcpu=cortex-a57 -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=REGULAR ; Check that we do not end up with useless spill code. ; Index: llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll +++ llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll @@ -1,5 +1,5 @@ ; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios -disable-post-ra < %s | FileCheck %s --check-prefix=CHECK-MACHO +; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios -disable-fp-elim -disable-post-ra < %s | FileCheck %s --check-prefix=CHECK-MACHO ; This test aims to check basic correctness of frame layout & ; frame access code. There are 8 functions in this test file, @@ -98,23 +98,19 @@ ; CHECK-LABEL: novla_nodynamicrealign_call ; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK: str x19, [sp, #-32]! -; Check that the frame pointer is created: -; CHECK: stp x29, x30, [sp, #16] -; CHECK: add x29, sp, #16 +; CHECK: stp x19, x30, [sp, #-16]! +; CHECK: sub sp, sp, #16 ; Check correctness of cfi pseudo-instructions -; CHECK: .cfi_def_cfa w29, 16 +; CHECK: .cfi_def_cfa_offset 32 ; CHECK: .cfi_offset w30, -8 -; CHECK: .cfi_offset w29, -16 -; CHECK: .cfi_offset w19, -32 -; Check correct access to arguments passed on the stack, through frame pointer -; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] -; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] +; CHECK: .cfi_offset w19, -16 +; Check correct access to arguments passed on the stack, through stack pointer +; CHECK: ldr d[[DARG:[0-9]+]], [sp, #56] +; CHECK: ldr w[[IARG:[0-9]+]], [sp, #40] ; Check correct access to local variable on the stack, through stack pointer ; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12] ; Check epilogue: -; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldr x19, [sp], #32 +; CHECK: ldp x19, x30, [sp], #16 ; CHECK: ret ; CHECK: .cfi_endproc @@ -700,8 +696,8 @@ ; CHECK: .[[LABEL]]: ; CHECK: ret -attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } !1 = !{!2, !2, i64 0} !2 = !{!"int", !3, i64 0} Index: llvm/trunk/test/CodeGen/AArch64/arm64-abi_align.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-abi_align.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-abi_align.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s -; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s +; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false -disable-fp-elim | FileCheck %s +; RUN: llc < %s -O0 -disable-fp-elim | FileCheck -check-prefix=FAST %s target triple = "arm64-apple-darwin" ; rdar://12648441 Index: llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll @@ -1,5 +1,5 @@ ; This test should cause the TargetMaterializeAlloca to be invoked -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -disable-fp-elim < %s | FileCheck %s %struct.S1Ty = type { i64 } %struct.S2Ty = type { %struct.S1Ty, %struct.S1Ty } Index: llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-call.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-call.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-call.ll @@ -1,6 +1,6 @@ -; RUN: llc -O0 -fast-isel-abort=2 -code-model=small -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s -; RUN: llc -O0 -fast-isel-abort=2 -code-model=large -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=LARGE -; RUN: llc -O0 -fast-isel-abort=2 -code-model=small -verify-machineinstrs -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -O0 -fast-isel-abort=2 -code-model=small -verify-machineinstrs -disable-fp-elim -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -O0 -fast-isel-abort=2 -code-model=large -verify-machineinstrs -disable-fp-elim -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=LARGE +; RUN: llc -O0 -fast-isel-abort=2 -code-model=small -verify-machineinstrs -disable-fp-elim -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE define void @call0() nounwind { entry: Index: llvm/trunk/test/CodeGen/AArch64/arm64-hello.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-hello.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-hello.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra | FileCheck %s +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra -disable-fp-elim | FileCheck %s ; RUN: llc < %s -mtriple=arm64-linux-gnu -disable-post-ra | FileCheck %s --check-prefix=CHECK-LINUX ; CHECK-LABEL: main: @@ -14,15 +14,14 @@ ; CHECK-NEXT: ret ; CHECK-LINUX-LABEL: main: -; CHECK-LINUX: stp x29, x30, [sp, #-16]! -; CHECK-LINUX-NEXT: mov x29, sp +; CHECK-LINUX: str x30, [sp, #-16]! ; CHECK-LINUX-NEXT: sub sp, sp, #16 -; CHECK-LINUX-NEXT: stur wzr, [x29, #-4] +; CHECK-LINUX-NEXT: str wzr, [sp, #12] ; CHECK-LINUX: adrp x0, .L.str ; CHECK-LINUX: add x0, x0, :lo12:.L.str ; CHECK-LINUX-NEXT: bl puts -; CHECK-LINUX-NEXT: mov sp, x29 -; CHECK-LINUX-NEXT: ldp x29, x30, [sp], #16 +; CHECK-LINUX-NEXT: add sp, sp, #16 +; CHECK-LINUX-NEXT: ldr x30, [sp], #16 ; CHECK-LINUX-NEXT: ret @.str = private unnamed_addr constant [7 x i8] c"hello\0A\00" Index: llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -1,5 +1,5 @@ -; RUN: llc %s -o - -enable-shrink-wrap=true -disable-post-ra | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE -; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: llc %s -o - -enable-shrink-wrap=true -disable-post-ra -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE +; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios" Index: llvm/trunk/test/CodeGen/AArch64/emutls.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/emutls.ll +++ llvm/trunk/test/CodeGen/AArch64/emutls.ll @@ -1,5 +1,5 @@ ; RUN: llc -emulated-tls -mtriple=aarch64-linux-android \ -; RUN: -relocation-model=pic < %s | FileCheck -check-prefix=ARM64 %s +; RUN: -relocation-model=pic -disable-fp-elim < %s | FileCheck -check-prefix=ARM64 %s ; Copied from X86/emutls.ll Index: llvm/trunk/test/CodeGen/AArch64/f16-instructions.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/f16-instructions.ll +++ llvm/trunk/test/CodeGen/AArch64/f16-instructions.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra | FileCheck %s +; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra -disable-fp-elim | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" Index: llvm/trunk/test/CodeGen/AArch64/fastcc.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fastcc.ll +++ llvm/trunk/test/CodeGen/AArch64/fastcc.ll @@ -1,6 +1,6 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt -aarch64-redzone | FileCheck %s -check-prefix CHECK-TAIL-RZ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-fp-elim -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-fp-elim | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-fp-elim -tailcallopt -aarch64-redzone | FileCheck %s -check-prefix CHECK-TAIL-RZ ; Without tailcallopt fastcc still means the caller cleans up the ; stack, so try to make sure this is respected. Index: llvm/trunk/test/CodeGen/AArch64/local_vars.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/local_vars.ll +++ llvm/trunk/test/CodeGen/AArch64/local_vars.ll @@ -24,24 +24,25 @@ } define void @trivial_fp_func() { -; CHECK-WITHFP-AARCH64-LABEL: trivial_fp_func: -; CHECK-WITHFP-AARCH64: sub sp, sp, #16 -; CHECK-WITHFP-AARCH64: stp x29, x30, [sp] -; CHECK-WITHFP-AARCH64-NEXT: mov x29, sp +; CHECK-LABEL: trivial_fp_func: +; CHECK: str x30, [sp, #-16]! +; CHECK-NOT: mov x29, sp ; CHECK-WITHFP-ARM64-LABEL: trivial_fp_func: ; CHECK-WITHFP-ARM64: stp x29, x30, [sp, #-16]! ; CHECK-WITHFP-ARM64-NEXT: mov x29, sp ; Dont't really care, but it would be a Bad Thing if this came after the epilogue. +; CHECK-WITHFP-ARM64: bl foo ; CHECK: bl foo call void @foo() ret void -; CHECK-WITHFP: ldp x29, x30, [sp] -; CHECK-WITHFP: add sp, sp, #16 +; CHECK: ldr x30, [sp], #16 +; CHECK-NEXT: ret -; CHECK-WITHFP: ret +; CHECK-WITHFP-ARM64: ldp x29, x30, [sp], #16 +; CHECK-WITHFP-ARM64-NEXT: ret } define void @stack_local() { Index: llvm/trunk/test/CodeGen/AArch64/machine-combiner.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/machine-combiner.ll +++ llvm/trunk/test/CodeGen/AArch64/machine-combiner.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math -disable-post-ra < %s | FileCheck %s ; Verify that the first two adds are independent regardless of how the inputs are ; commuted. The destination registers are used as source registers for the third add. Index: llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll +++ llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -disable-fp-elim -o - %s | FileCheck %s ; When generating DAG selection tables, TableGen used to only flag an ; instruction as needing a chain on its own account if it had a built-in pattern Index: llvm/trunk/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll +++ llvm/trunk/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm64-apple-ios -relocation-model=pic | FileCheck %s +; RUN: llc < %s -mtriple=arm64-apple-ios -relocation-model=pic -disable-fp-elim | FileCheck %s @__stack_chk_guard = external global i64* Index: llvm/trunk/test/DebugInfo/AArch64/frameindices.ll =================================================================== --- llvm/trunk/test/DebugInfo/AArch64/frameindices.ll +++ llvm/trunk/test/DebugInfo/AArch64/frameindices.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s +; RUN: llc -disable-fp-elim -O0 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s ; Test that a variable with multiple entries in the MMI table makes it into the ; debug info. ;