diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -344,6 +344,11 @@ virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS = nullptr) const; + /// processFunctionBeforeCalleeSpill - This method is called immediately + /// before the specified function's callee saved registers are calculated and + /// spilled. This method is optional. + virtual void processFunctionBeforeCalleeSpill(MachineFunction &MF) const {} + /// processFunctionBeforeFrameFinalized - This method is called immediately /// before the specified function's frame layout (MF.getFrameInfo()) is /// finalized. Once the frame is finalized, MO_FrameIndex operands are diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -236,6 +236,10 @@ for (MachineBasicBlock *SaveBlock : SaveBlocks) stashEntryDbgValues(*SaveBlock, EntryDbgValues); + // Allow the target to make preperations to a function before the callee + // registers are calculated and spilled. + TFI->processFunctionBeforeCalleeSpill(MF); + // Handle CSR spilling and restoring, for targets that need it. if (MF.getTarget().usesPhysRegsForValues()) spillCalleeSavedRegs(MF); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -83,6 +83,8 @@ bool enableStackSlotScavenging(const MachineFunction &MF) const override; TargetStackID::Value getStackIDForScalableVectors() const override; + void processFunctionBeforeCalleeSpill(MachineFunction &MF) const override; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -343,7 +343,8 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64FunctionInfo *AFI = MF.getInfo(); - const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + const AArch64Subtarget &STI = MF.getSubtarget(); + const TargetRegisterInfo *RegInfo = STI.getRegisterInfo(); // Win64 EH requires a frame pointer if funclets are present, as the locals // are accessed off the frame pointer in both the parent function and the // funclets. @@ -367,14 +368,19 @@ if (!MFI.isMaxCallFrameSizeComputed() || MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement) return true; - // If there are both SVE and non-SVE objects on the stack, make the frame - // pointer available since it may be more performant to use it. - uint64_t CalleeStackSize = AFI->isCalleeSavedStackSizeComputed() - ? AFI->getCalleeSavedStackSize() - : 0; - uint64_t NonSVEStackSize = MFI.getStackSize() - CalleeStackSize; - if (AFI->getStackSizeSVE() && NonSVEStackSize) - return true; + + // Only perform the below in the presence of SVE so as to avoid reserving x29 + // unnecessarily. + if (STI.hasSVE()) { + // If we have not yet determined if we should be using the FP then as above + // we must be conservative and return true. Unfortunatly this will cause x29 + // to always be reserved in the presence of SVE, which is a trade off for + // the large gains using a frame pointer can provide. + if (!AFI->hasCalculatedSVEShouldUseFP()) + return true; + + return AFI->getSVEShouldUseFP(); + } return false; } @@ -2942,6 +2948,29 @@ true); } +void AArch64FrameLowering::processFunctionBeforeCalleeSpill( + MachineFunction &MF) const { + MachineFrameInfo &MFI = MF.getFrameInfo(); + AArch64FunctionInfo *AFI = MF.getInfo(); + + // Determine whether this function should use a frame pointer or not. This + // calculation should only be done once so as to avoid changing our mind if + // the stack objects change. + assert(!AFI->hasCalculatedSVEShouldUseFP()); + + // If there are both SVE and non-SVE objects on the stack, make the frame + // pointer available since it may be more performant to use it. + bool HasSVEStackObjects = false, HasNonSVEStackObjects = false; + for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E; + ++I) + if (MFI.getStackID(I) == TargetStackID::ScalableVector) + HasSVEStackObjects = true; + else + HasNonSVEStackObjects = true; + + AFI->setSVEShouldUseFP(HasSVEStackObjects && HasNonSVEStackObjects); +} + void AArch64FrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -118,6 +118,13 @@ /// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid. bool HasCalculatedStackSizeSVE = false; + /// SVEShouldUseFP indicates whether the frame pointer should be used based + /// upon which types of stack objects are present (SVE and non-SVE). + bool SVEShouldUseFP = false; + + /// HasCalculatedSVEShouldUseFP indicates whether SVEShouldUseFP is valid. + bool HasCalculatedSVEShouldUseFP = false; + /// Has a value when it is known whether or not the function uses a /// redzone, and no value otherwise. /// Initialized during frame lowering, unless the function has the noredzone @@ -181,6 +188,17 @@ uint64_t getStackSizeSVE() const { return StackSizeSVE; } + bool hasCalculatedSVEShouldUseFP() const { + return HasCalculatedSVEShouldUseFP; + } + + void setSVEShouldUseFP(bool S) { + HasCalculatedSVEShouldUseFP = true; + SVEShouldUseFP = S; + } + + bool getSVEShouldUseFP() const { return SVEShouldUseFP; } + bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } @@ -248,10 +266,6 @@ return getCalleeSavedStackSize(); } - bool isCalleeSavedStackSizeComputed() const { - return HasCalleeSavedStackSize; - } - unsigned getCalleeSavedStackSize() const { assert(HasCalleeSavedStackSize && "CalleeSavedStackSize has not been calculated"); diff --git a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir --- a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir +++ b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir @@ -6,10 +6,10 @@ # RUN: llvm-dwarfdump --name="value4" %t | FileCheck %s --check-prefix=CHECK4 # RUN: llvm-dwarfdump --name="value5" %t | FileCheck %s --check-prefix=CHECK5 -# CHECK0: : DW_OP_breg31 WSP+8, DW_OP_lit16, DW_OP_plus) +# CHECK0: : DW_OP_breg29 W29+24, DW_OP_lit16, DW_OP_plus) # CHECK0: DW_AT_type {{.*}}ty32 # -# CHECK1: : DW_OP_breg31 WSP+16) +# CHECK1: : DW_OP_breg31 WSP+8, DW_OP_lit16, DW_OP_plus) # CHECK1: DW_AT_type {{.*}}ty32 # # CHECK2: : DW_OP_breg29 W29+0, DW_OP_lit8, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus) diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.ll b/llvm/test/CodeGen/AArch64/framelayout-sve.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/framelayout-sve.ll @@ -0,0 +1,86 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -o - %s | FileCheck %s + +; A more end to end test than test/CodeGen/AArch64/framelayout-sve.mir to +; ensure that hasFP is returning a consistent value throughout. + +; Check that FP is used for SVE stack objects and that SP is used for non-SVE +; stack objects + +define void @func1( %v0, i32 %v1) { + ; CHECK-LABEL: func1 + ; CHECK: st1w { z0.d }, p0, [x29, #-1, mul vl] + ; CHECK: str w0, [sp, #12] + ; CHECK: str w0, [sp, #8] + ; CHECK: str w0, [sp, #4] + + %local0 = alloca + %local1 = alloca i32 + %local2 = alloca i32 + %local3 = alloca i32 + store volatile %v0, * %local0 + store volatile i32 %v1, i32* %local1 + store volatile i32 %v1, i32* %local2 + store volatile i32 %v1, i32* %local3 + ret void +} + +; Check that FP is not used when there are no non-SVE objects on the stack + +define void @func2( %v0) { + ; CHECK-LABEL: func2 + ; CHECK: st1w { z0.d }, p0, [sp, #1, mul vl] + + %local0 = alloca + store volatile %v0, * %local0 + ret void +} + +; Check that FP is not used when there are no SVE objects on the stack + +define void @func3(i32 %v0) { + ; CHECK-LABEL: func3 + ; CHECK: str w0, [sp, #12] + + %local0 = alloca i32 + store volatile i32 %v0, i32* %local0 + ret void +} + +; Check that FP is used appropriatly in the presense of only loads + +define void @func4() { + ; CHECK-LABEL: func4 + ; CHECK: ldr w8, [sp, #12] + ; CHECK: ld1w { z0.d }, p0/z, [x29, #-1, mul vl] + + %local0 = alloca i32 + %local1 = alloca + load volatile i32, i32* %local0 + load volatile , * %local1 + ret void +} + +; Check that in presence of high register pressure x29 does not get used as a +; general purpose register when FP is in use + +@var = global [30 x i64] zeroinitializer + +define void @func5( %v0, i32 %v1) { + ; CHECK-LABEL: func5 + ; CHECK: mov x29, sp + ; CHECK-NOT: ldr x29 + ; CHECK-NOT: str x29 + + %val = load volatile [30 x i64], [30 x i64]* @var + store volatile [30 x i64] %val, [30 x i64]* @var + + %local0 = alloca + %local1 = alloca i32 + %local2 = alloca i32 + %local3 = alloca i32 + store volatile %v0, * %local0 + store volatile i32 %v1, i32* %local1 + store volatile i32 %v1, i32* %local2 + store volatile i32 %v1, i32* %local3 + ret void +} diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir --- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir @@ -56,15 +56,15 @@ # CHECK: stackSize: 32 # CHECK: bb.0.entry: -# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 +# CHECK-NEXT: $sp = frame-setup STPXpre killed $fp, killed $[[SCRATCH:[a-z0-9]+]], $sp, -2 # CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-COUNT-2: frame-setup CFI_INSTRUCTION +# CHECK-COUNT-3: frame-setup CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 -# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16 +# CHECK-NEXT: $sp, $fp, $[[SCRATCH]] = frame-destroy LDPXpost $sp, 2 # CHECK-NEXT: RET_ReallyLR # ASM-LABEL: test_allocate_sve: @@ -93,28 +93,30 @@ # CHECK: stackSize: 48 # CHECK: bb.0.entry: -# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -32 +# CHECK-NEXT: $sp = frame-setup STPXpre killed $fp, killed $[[SCRATCH:[a-z0-9]+]], $sp, -4 # CHECK-NEXT: frame-setup STPXi killed $x21, killed $x20, $sp, 2 # CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-COUNT-4: frame-setup CFI_INSTRUCTION +# CHECK-COUNT-5: frame-setup CFI_INSTRUCTION # # CHECK-NEXT: $x20 = IMPLICIT_DEF # CHECK-NEXT: $x21 = IMPLICIT_DEF # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 # CHECK-NEXT: $x21, $x20 = frame-destroy LDPXi $sp, 2 -# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 32 +# CHECK-NEXT: $sp, $fp, $[[SCRATCH]] = frame-destroy LDPXpost $sp, 4 # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_allocate_sve_gpr_callee_saves: # ASM: .cfi_offset w20, -8 # ASM-NEXT: .cfi_offset w21, -16 +# ASM-NEXT: .cfi_offset w30, -24 # ASM-NEXT: .cfi_offset w29, -32 # # UNWINDINFO: DW_CFA_offset: reg20 -8 # UNWINDINFO-NEXT: DW_CFA_offset: reg21 -16 +# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -24 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 name: test_allocate_sve_gpr_callee_saves stack: @@ -181,11 +183,11 @@ # CHECK: stackSize: 32 # CHECK: bb.0.entry: -# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 +# CHECK-NEXT: $sp = frame-setup STPXpre killed $fp, killed $[[SCRATCH:[a-z0-9]+]], $sp, -2 # CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-COUNT-2: frame-setup CFI_INSTRUCTION +# CHECK-COUNT-3: frame-setup CFI_INSTRUCTION # CHECK-NEXT: STR_ZXI $z0, $fp, -1 # CHECK-NEXT: STR_ZXI $z1, $fp, -2 @@ -193,7 +195,7 @@ # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 -# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16 +# CHECK-NEXT: $sp, $fp, $[[SCRATCH]] = frame-destroy LDPXpost $sp, 2 # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_address_sve: @@ -291,17 +293,17 @@ # CHECK: stackSize: 32 # CHECK: bb.0.entry: -# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 +# CHECK-NEXT: $sp = frame-setup STPXpre killed $fp, killed $[[SCRATCH:[a-z0-9]+]], $sp, -2 # CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-COUNT-2: frame-setup CFI_INSTRUCTION +# CHECK-COUNT-3: frame-setup CFI_INSTRUCTION # CHECK-NEXT: $x0 = LDRXui $fp, 2 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 -# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16 +# CHECK-NEXT: $sp, $fp, $[[SCRATCH]] = frame-destroy LDPXpost $sp, 2 # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_stack_arg_sve: @@ -470,7 +472,6 @@ ... # CHECK-LABEL: name: save_restore_zregs_sve # CHECK: $sp = frame-setup STRXpre killed $fp, $sp, -16 -# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 # CHECK-NEXT: frame-setup STR_ZXI killed $z10, $sp, 0 # CHECK-NEXT: frame-setup STR_ZXI killed $z9, $sp, 1 @@ -516,8 +517,10 @@ # paired correctly. # # CHECK-LABEL: name: save_restore_sve -# CHECK: $sp = frame-setup STPXpre killed ${{[a-z0-9]+}}, killed $x21, $sp, -4 -# CHECK: frame-setup STPXi killed $x20, killed $x19, $sp, 2 +# CHECK: $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -6 +# CHECK: frame-setup STRXui killed $x21, $sp, 2 +# CHECK: frame-setup STPXi killed $x20, killed $x19, $sp, 4 +# CHECK: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK: $sp = frame-setup ADDVL_XXI $sp, -18 # CHECK: frame-setup STR_PXI killed $p15, $sp, 4 # CHECK: frame-setup STR_PXI killed $p14, $sp, 5 @@ -529,7 +532,7 @@ # CHECK: frame-setup STR_ZXI killed $z8, $sp, 17 # CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK: $sp = frame-setup SUBXri $sp, 32, 0 -# CHECK-COUNT-13: frame-setup CFI_INSTRUCTION +# CHECK-COUNT-14: frame-setup CFI_INSTRUCTION # CHECK: $sp = frame-destroy ADDXri $sp, 32, 0 # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 1 @@ -542,36 +545,39 @@ # CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16 # CHECK: $z8 = frame-destroy LDR_ZXI $sp, 17 # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 18 -# CHECK: $x20, $x19 = frame-destroy LDPXi $sp, 2 -# CHECK: $sp, ${{[a-z0-9]+}}, $x21 = frame-destroy LDPXpost $sp, 4 +# CHECK: $x20, $x19 = frame-destroy LDPXi $sp, 4 +# CHECK: $x21 = frame-destroy LDRXui $sp, 2 +# CHECK: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 6 # CHECK: RET_ReallyLR # # ASM-LABEL: save_restore_sve: -# ASM: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 32 - 32 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 32 - 40 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG +# ASM: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 48 - 32 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 48 - 40 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 48 - 48 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 48 - 56 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 48 - 64 * VG # ASM-NEXT: .cfi_offset w19, -8 # ASM-NEXT: .cfi_offset w20, -16 -# ASM-NEXT: .cfi_offset w21, -24 -# ASM-NEXT: .cfi_offset w29, -32 -# -# UNWINDINFO: DW_CFA_expression: reg72 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -32, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg76 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -40, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -48, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -56, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -64, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# ASM-NEXT: .cfi_offset w21, -32 +# ASM-NEXT: .cfi_offset w30, -40 +# ASM-NEXT: .cfi_offset w29, -48 +# +# UNWINDINFO: DW_CFA_expression: reg72 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -32, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg76 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -40, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -48, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -56, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -64, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_offset: reg19 -8 # UNWINDINFO-NEXT: DW_CFA_offset: reg20 -16 -# UNWINDINFO-NEXT: DW_CFA_offset: reg21 -24 -# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 +# UNWINDINFO-NEXT: DW_CFA_offset: reg21 -32 +# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -40 +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -48 name: save_restore_sve stack: diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -41,18 +41,15 @@ ; CHECK-LABEL: foo2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: addvl sp, sp, #-4 +; CHECK-NEXT: sub sp, sp, #16 // =16 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] ; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: addvl x8, x29, #-4 +; CHECK-NEXT: add x8, sp, #16 // =16 +; CHECK-NEXT: add x9, sp, #16 // =16 ; CHECK-NEXT: fmov s0, #1.00000000 -; CHECK-NEXT: st1d { z16.d }, p0, [x29, #-4, mul vl] -; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] ; CHECK-NEXT: mov w1, #1 ; CHECK-NEXT: mov w2, #2 ; CHECK-NEXT: mov w3, #3 @@ -60,8 +57,12 @@ ; CHECK-NEXT: mov w5, #5 ; CHECK-NEXT: mov w6, #6 ; CHECK-NEXT: mov w7, #7 -; CHECK-NEXT: str x8, [sp, #-16]! ; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: st1d { z16.d }, p0, [x9] +; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl callee2 ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: add sp, sp, #16 // =16