diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -296,6 +296,7 @@ /// pointer register. bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); + const AArch64FunctionInfo *AFI = MF.getInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); // Win64 EH requires a frame pointer if funclets are present, as the locals // are accessed off the frame pointer in both the parent function and the @@ -320,6 +321,14 @@ if (!MFI.isMaxCallFrameSizeComputed() || MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement) return true; + // If there are both SVE and non-SVE objects on the stack, make the frame + // pointer available since it may be more performant to use it. + uint64_t CalleeStackSize = AFI->isCalleeSavedStackSizeComputed() + ? AFI->getCalleeSavedStackSize() + : 0; + uint64_t NonSVEStackSize = MFI.getStackSize() - CalleeStackSize; + if (AFI->getStackSizeSVE() && NonSVEStackSize) + return true; return false; } @@ -1883,10 +1892,6 @@ // right thing for the emergency spill slot. bool UseFP = false; if (AFI->hasStackFrame() && !isSVE) { - // We shouldn't prefer using the FP when there is an SVE area - // in between the FP and the non-SVE locals/spills. - PreferFP &= !SVEStackSize; - // Note: Keeping the following as multiple 'if' statements rather than // merging to a single expression for readability. // @@ -1907,6 +1912,10 @@ bool FPOffsetFits = !ForSimm || FPOffset >= -256; PreferFP |= Offset > -FPOffset; + // The FP offset will not fit if there is an SVE area in the way. + if (SVEStackSize && FPOffset < 0) + FPOffsetFits = false; + if (MFI.hasVarSizedObjects()) { // If we have variable sized objects, we can use either FP or BP, as the // SP offset is unknown. We can use the base pointer if we have one and diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -248,6 +248,10 @@ return getCalleeSavedStackSize(); } + bool isCalleeSavedStackSizeComputed() const { + return HasCalleeSavedStackSize; + } + unsigned getCalleeSavedStackSize() const { assert(HasCalleeSavedStackSize && "CalleeSavedStackSize has not been calculated"); diff --git a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir --- a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir +++ b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir @@ -12,16 +12,16 @@ # CHECK1: : DW_OP_breg31 WSP+16) # CHECK1: DW_AT_type {{.*}}ty32 # -# CHECK2: : DW_OP_breg31 WSP+16, DW_OP_lit16, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus) +# CHECK2: : DW_OP_breg29 W29+0, DW_OP_lit8, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus) # CHECK2: DW_AT_type {{.*}}svint32_t # -# CHECK3: : DW_OP_breg31 WSP+16, DW_OP_lit8, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus) +# CHECK3: : DW_OP_breg29 W29+0, DW_OP_lit16, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus) # CHECK3: DW_AT_type {{.*}}svint32_t # -# CHECK4: : DW_OP_breg31 WSP+16, DW_OP_lit7, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus) +# CHECK4: : DW_OP_breg29 W29+0, DW_OP_lit17, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus) # CHECK4: DW_AT_type {{.*}}svbool_t # -# CHECK5: : DW_OP_breg31 WSP+16, DW_OP_lit6, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus) +# CHECK5: : DW_OP_breg29 W29+0, DW_OP_lit18, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus) # CHECK5: DW_AT_type {{.*}}svbool_t --- | diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir --- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir @@ -57,6 +57,7 @@ # CHECK: bb.0.entry: # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 +# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-COUNT-2: frame-setup CFI_INSTRUCTION @@ -67,11 +68,9 @@ # CHECK-NEXT: RET_ReallyLR # ASM-LABEL: test_allocate_sve: -# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG -# ASM-NEXT: .cfi_offset w29, -16 +# ASM: .cfi_offset w29, -16 # -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 +# UNWINDINFO: DW_CFA_offset: reg29 -16 name: test_allocate_sve stack: - { id: 0, stack-id: scalable-vector, size: 18, alignment: 2 } @@ -96,6 +95,7 @@ # CHECK: bb.0.entry: # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -32 # CHECK-NEXT: frame-setup STPXi killed $x21, killed $x20, $sp, 2 +# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-COUNT-4: frame-setup CFI_INSTRUCTION @@ -109,13 +109,11 @@ # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_allocate_sve_gpr_callee_saves: -# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 16 * VG -# ASM-NEXT: .cfi_offset w20, -8 +# ASM: .cfi_offset w20, -8 # ASM-NEXT: .cfi_offset w21, -16 # ASM-NEXT: .cfi_offset w29, -32 # -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_offset: reg20 -8 +# UNWINDINFO: DW_CFA_offset: reg20 -8 # UNWINDINFO-NEXT: DW_CFA_offset: reg21 -16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 name: test_allocate_sve_gpr_callee_saves @@ -184,16 +182,14 @@ # CHECK: bb.0.entry: # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 +# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-COUNT-2: frame-setup CFI_INSTRUCTION -# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16 -# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], 2 -# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16 -# CHECK-NEXT: STR_ZXI $z1, killed $[[TMP]], 1 -# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16 -# CHECK-NEXT: STR_PXI $p0, killed $[[TMP]], 7 +# CHECK-NEXT: STR_ZXI $z0, $fp, -1 +# CHECK-NEXT: STR_ZXI $z1, $fp, -2 +# CHECK-NEXT: STR_PXI $p0, $fp, -17 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 @@ -201,11 +197,9 @@ # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_address_sve: -# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 24 * VG -# ASM-NEXT: .cfi_offset w29, -16 +# ASM: .cfi_offset w29, -16 # -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 +# UNWINDINFO: DW_CFA_offset: reg29 -16 name: test_address_sve frameInfo: @@ -298,12 +292,12 @@ # CHECK: bb.0.entry: # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 +# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-COUNT-2: frame-setup CFI_INSTRUCTION -# CHECK: $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1 -# CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4 +# CHECK-NEXT: $x0 = LDRXui $fp, 2 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 @@ -311,11 +305,9 @@ # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_stack_arg_sve: -# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG -# ASM-NEXT: .cfi_offset w29, -16 +# ASM: .cfi_offset w29, -16 # -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 +# UNWINDINFO: DW_CFA_offset: reg29 -16 name: test_stack_arg_sve fixedStack: @@ -460,11 +452,9 @@ # CHECK: RET_ReallyLR # # ASM-LABEL: save_restore_pregs_sve: -# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 8 * VG -# ASM-NEXT: .cfi_offset w29, -16 +# ASM: .cfi_offset w29, -16 # -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 +# UNWINDINFO: DW_CFA_offset: reg29 -16 name: save_restore_pregs_sve stack: - { id: 0, stack-id: default, size: 32, alignment: 16 } @@ -480,6 +470,7 @@ ... # CHECK-LABEL: name: save_restore_zregs_sve # CHECK: $sp = frame-setup STRXpre killed $fp, $sp, -16 +# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 # CHECK-NEXT: frame-setup STR_ZXI killed $z10, $sp, 0 # CHECK-NEXT: frame-setup STR_ZXI killed $z9, $sp, 1 @@ -496,13 +487,11 @@ # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: save_restore_zregs_sve: -# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 24 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +# ASM: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG # ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG # ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg72 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_expression: reg72 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 @@ -558,8 +547,7 @@ # CHECK: RET_ReallyLR # # ASM-LABEL: save_restore_sve: -# ASM: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 152 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG +# ASM: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG # ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG # ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG # ASM-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 32 - 32 * VG @@ -572,8 +560,7 @@ # ASM-NEXT: .cfi_offset w21, -24 # ASM-NEXT: .cfi_offset w29, -32 # -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg72 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_expression: reg72 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -32, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -41,15 +41,18 @@ ; CHECK-LABEL: foo2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: sub sp, sp, #16 // =16 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] ; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: add x8, sp, #16 // =16 -; CHECK-NEXT: add x9, sp, #16 // =16 +; CHECK-NEXT: addvl x8, x29, #-4 ; CHECK-NEXT: fmov s0, #1.00000000 +; CHECK-NEXT: st1d { z16.d }, p0, [x29, #-4, mul vl] +; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] ; CHECK-NEXT: mov w1, #1 ; CHECK-NEXT: mov w2, #2 ; CHECK-NEXT: mov w3, #3 @@ -57,12 +60,8 @@ ; CHECK-NEXT: mov w5, #5 ; CHECK-NEXT: mov w6, #6 ; CHECK-NEXT: mov w7, #7 +; CHECK-NEXT: str x8, [sp, #-16]! ; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: st1d { z16.d }, p0, [x9] -; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] -; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl callee2 ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: add sp, sp, #16 // =16