diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -344,6 +344,11 @@ virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS = nullptr) const; + /// processFunctionBeforeCalleeSpill - This method is called immediately + /// before the specified function's callee saved registers are calculated and + /// spilled. This method is optional. + virtual void processFunctionBeforeCalleeSpill(MachineFunction &MF) const {} + /// processFunctionBeforeFrameFinalized - This method is called immediately /// before the specified function's frame layout (MF.getFrameInfo()) is /// finalized. Once the frame is finalized, MO_FrameIndex operands are diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -236,6 +236,10 @@ for (MachineBasicBlock *SaveBlock : SaveBlocks) stashEntryDbgValues(*SaveBlock, EntryDbgValues); + // Allow the target to make preperations to a function before the callee + // registers are calculated and spilled. + TFI->processFunctionBeforeCalleeSpill(MF); + // Handle CSR spilling and restoring, for targets that need it. if (MF.getTarget().usesPhysRegsForValues()) spillCalleeSavedRegs(MF); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -83,6 +83,8 @@ bool enableStackSlotScavenging(const MachineFunction &MF) const override; TargetStackID::Value getStackIDForScalableVectors() const override; + void processFunctionBeforeCalleeSpill(MachineFunction &MF) const override; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -347,7 +347,8 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64FunctionInfo *AFI = MF.getInfo(); - const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + const AArch64Subtarget &STI = MF.getSubtarget(); + const TargetRegisterInfo *RegInfo = STI.getRegisterInfo(); // Win64 EH requires a frame pointer if funclets are present, as the locals // are accessed off the frame pointer in both the parent function and the // funclets. @@ -371,14 +372,21 @@ if (!MFI.isMaxCallFrameSizeComputed() || MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement) return true; - // If there are both SVE and non-SVE objects on the stack, make the frame - // pointer available since it may be more performant to use it. - uint64_t CalleeStackSize = AFI->isCalleeSavedStackSizeComputed() - ? AFI->getCalleeSavedStackSize() - : 0; - uint64_t NonSVEStackSize = MFI.getStackSize() - CalleeStackSize; - if (AFI->getStackSizeSVE() && NonSVEStackSize) - return true; + + // FIXME: Once LocalStackSlotAllocation works on multiple StackIDs, the below + // code can be removed, along with the changes in getReservedRegs(). + // Only perform the below in the presence of SVE so as to avoid reserving x29 + // unnecessarily. + if (STI.hasSVE()) { + // If we have not yet determined if we should be using the FP then as above + // we must be conservative and return true. Unfortunatly this will cause x29 + // to always be reserved in the presence of SVE, which is a trade off for + // the large gains using a frame pointer can provide. + if (!AFI->hasCalculatedSVEShouldUseFP()) + return true; + + return AFI->getSVEShouldUseFP(); + } return false; } @@ -2946,6 +2954,36 @@ true); } +// FIXME: Once LocalStackSlotAllocation works on multiple StackIDs, this +// function can be removed. +void AArch64FrameLowering::processFunctionBeforeCalleeSpill(MachineFunction &MF) const { + MachineFrameInfo &MFI = MF.getFrameInfo(); + AArch64FunctionInfo *AFI = MF.getInfo(); + const AArch64Subtarget &STI = MF.getSubtarget(); + + // If we are not building with SVE then there is no reason to calculate the + // below. + if (!STI.hasSVE()) + return; + + // Determine whether this function should use a frame pointer or not. This + // calculation should only be done once so as to avoid changing our mind if + // the stack objects change. + assert(!AFI->hasCalculatedSVEShouldUseFP()); + + // If there are both SVE and non-SVE objects on the stack, make the frame + // pointer available since it may be more performant to use it. + bool HasSVEStackObjects = false, HasNonSVEStackObjects = false; + for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E; + ++I) + if (MFI.getStackID(I) == TargetStackID::ScalableVector) + HasSVEStackObjects = true; + else + HasNonSVEStackObjects = true; + + AFI->setSVEShouldUseFP(HasSVEStackObjects && HasNonSVEStackObjects); +} + void AArch64FrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -118,6 +118,13 @@ /// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid. bool HasCalculatedStackSizeSVE = false; + /// SVEShouldUseFP indicates whether the frame pointer should be used based + /// upon which types of stack objects are present (SVE and non-SVE). + bool SVEShouldUseFP = false; + + /// HasCalculatedSVEShouldUseFP indicates whether SVEShouldUseFP is valid. + bool HasCalculatedSVEShouldUseFP = false; + /// Has a value when it is known whether or not the function uses a /// redzone, and no value otherwise. /// Initialized during frame lowering, unless the function has the noredzone @@ -181,6 +188,17 @@ uint64_t getStackSizeSVE() const { return StackSizeSVE; } + bool hasCalculatedSVEShouldUseFP() const { + return HasCalculatedSVEShouldUseFP; + } + + void setSVEShouldUseFP(bool S) { + HasCalculatedSVEShouldUseFP = true; + SVEShouldUseFP = S; + } + + bool getSVEShouldUseFP() const { return SVEShouldUseFP; } + bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } @@ -248,10 +266,6 @@ return getCalleeSavedStackSize(); } - bool isCalleeSavedStackSizeComputed() const { - return HasCalleeSavedStackSize; - } - unsigned getCalleeSavedStackSize() const { assert(HasCalleeSavedStackSize && "CalleeSavedStackSize has not been calculated"); diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -303,17 +303,27 @@ BitVector AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { const AArch64FrameLowering *TFI = getFrameLowering(MF); + const AArch64Subtarget &STI = MF.getSubtarget(); // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); markSuperRegs(Reserved, AArch64::WSP); markSuperRegs(Reserved, AArch64::WZR); - if (TFI->hasFP(MF) || TT.isOSDarwin()) + // FIXME: Once LocalStackSlotAllocation works on multiple StackIDs and the + // hasFP() return value doesn't change after frame lowering, SVE no longer + // needs to always reserve FP. + // If we have SVE we always reserve the frame pointer. This is because the + // return of hasFP() can change just before frame lowering. We need to avoid + // changing the result of getReservedRegs() after register allocation + // otherwise the cached reserved registers in register scavenging does not + // match the value here, hence we can get into trouble with the register + // assigned as the emergency spill. + if (STI.hasSVE() || TFI->hasFP(MF) || TT.isOSDarwin()) markSuperRegs(Reserved, AArch64::W29); for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) { - if (MF.getSubtarget().isXRegisterReserved(i)) + if (STI.isXRegisterReserved(i)) markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i)); } diff --git a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir --- a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir +++ b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir @@ -6,10 +6,10 @@ # RUN: llvm-dwarfdump --name="value4" %t | FileCheck %s --check-prefix=CHECK4 # RUN: llvm-dwarfdump --name="value5" %t | FileCheck %s --check-prefix=CHECK5 -# CHECK0: : DW_OP_breg31 WSP+8, DW_OP_lit16, DW_OP_plus) +# CHECK0: : DW_OP_breg29 W29+24, DW_OP_lit16, DW_OP_plus) # CHECK0: DW_AT_type {{.*}}ty32 # -# CHECK1: : DW_OP_breg31 WSP+16) +# CHECK1: : DW_OP_breg31 WSP+8, DW_OP_lit16, DW_OP_plus) # CHECK1: DW_AT_type {{.*}}ty32 # # CHECK2: : DW_OP_breg29 W29+0, DW_OP_lit8, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus) diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir --- a/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir @@ -5,21 +5,21 @@ define aarch64_sve_vector_pcs void @fix_restorepoint_p4() { entry: unreachable } ; CHECK-LABEL: fix_restorepoint_p4: ; CHECK: // %bb.0: // %entry - ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill + ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG - ; CHECK-NEXT: .cfi_offset w29, -16 + ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: // implicit-def: $z8 ; CHECK-NEXT: // implicit-def: $p4 ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 - ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload + ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret ... name: fix_restorepoint_p4 diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.ll b/llvm/test/CodeGen/AArch64/framelayout-sve.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/framelayout-sve.ll @@ -0,0 +1,86 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -o - %s | FileCheck %s + +; A more end to end test than test/CodeGen/AArch64/framelayout-sve.mir to +; ensure that hasFP is returning a consistent value throughout. + +; Check that FP is used for SVE stack objects and that SP is used for non-SVE +; stack objects + +define void @func1( %v0, i32 %v1) { + ; CHECK-LABEL: func1 + ; CHECK: st1w { z0.d }, p0, [x29, #-1, mul vl] + ; CHECK: str w0, [sp, #12] + ; CHECK: str w0, [sp, #8] + ; CHECK: str w0, [sp, #4] + + %local0 = alloca + %local1 = alloca i32 + %local2 = alloca i32 + %local3 = alloca i32 + store volatile %v0, * %local0 + store volatile i32 %v1, i32* %local1 + store volatile i32 %v1, i32* %local2 + store volatile i32 %v1, i32* %local3 + ret void +} + +; Check that FP is not used when there are no non-SVE objects on the stack + +define void @func2( %v0) { + ; CHECK-LABEL: func2 + ; CHECK: st1w { z0.d }, p0, [sp, #1, mul vl] + + %local0 = alloca + store volatile %v0, * %local0 + ret void +} + +; Check that FP is not used when there are no SVE objects on the stack + +define void @func3(i32 %v0) { + ; CHECK-LABEL: func3 + ; CHECK: str w0, [sp, #12] + + %local0 = alloca i32 + store volatile i32 %v0, i32* %local0 + ret void +} + +; Check that FP is used appropriatly in the presense of only loads + +define void @func4() { + ; CHECK-LABEL: func4 + ; CHECK: ldr w8, [sp, #12] + ; CHECK: ld1w { z0.d }, p0/z, [x29, #-1, mul vl] + + %local0 = alloca i32 + %local1 = alloca + load volatile i32, i32* %local0 + load volatile , * %local1 + ret void +} + +; Check that in presence of high register pressure x29 does not get used as a +; general purpose register when FP is in use + +@var = global [30 x i64] zeroinitializer + +define void @func5( %v0, i32 %v1) { + ; CHECK-LABEL: func5 + ; CHECK: mov x29, sp + ; CHECK-NOT: ldr x29 + ; CHECK-NOT: str x29 + + %val = load volatile [30 x i64], [30 x i64]* @var + store volatile [30 x i64] %val, [30 x i64]* @var + + %local0 = alloca + %local1 = alloca i32 + %local2 = alloca i32 + %local3 = alloca i32 + store volatile %v0, * %local0 + store volatile i32 %v1, i32* %local1 + store volatile i32 %v1, i32* %local2 + store volatile i32 %v1, i32* %local3 + ret void +} diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir --- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir @@ -56,15 +56,15 @@ # CHECK: stackSize: 32 # CHECK: bb.0.entry: -# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 +# CHECK-NEXT: $sp = frame-setup STPXpre killed $fp, killed $[[SCRATCH:[a-z0-9]+]], $sp, -2 # CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-COUNT-2: frame-setup CFI_INSTRUCTION +# CHECK-COUNT-3: frame-setup CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 -# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16 +# CHECK-NEXT: $sp, $fp, $[[SCRATCH]] = frame-destroy LDPXpost $sp, 2 # CHECK-NEXT: RET_ReallyLR # ASM-LABEL: test_allocate_sve: @@ -93,28 +93,30 @@ # CHECK: stackSize: 48 # CHECK: bb.0.entry: -# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -32 +# CHECK-NEXT: $sp = frame-setup STPXpre killed $fp, killed $[[SCRATCH:[a-z0-9]+]], $sp, -4 # CHECK-NEXT: frame-setup STPXi killed $x21, killed $x20, $sp, 2 # CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-COUNT-4: frame-setup CFI_INSTRUCTION +# CHECK-COUNT-5: frame-setup CFI_INSTRUCTION # # CHECK-NEXT: $x20 = IMPLICIT_DEF # CHECK-NEXT: $x21 = IMPLICIT_DEF # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 # CHECK-NEXT: $x21, $x20 = frame-destroy LDPXi $sp, 2 -# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 32 +# CHECK-NEXT: $sp, $fp, $[[SCRATCH]] = frame-destroy LDPXpost $sp, 4 # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_allocate_sve_gpr_callee_saves: # ASM: .cfi_offset w20, -8 # ASM-NEXT: .cfi_offset w21, -16 +# ASM-NEXT: .cfi_offset w30, -24 # ASM-NEXT: .cfi_offset w29, -32 # # UNWINDINFO: DW_CFA_offset: reg20 -8 # UNWINDINFO-NEXT: DW_CFA_offset: reg21 -16 +# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -24 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 name: test_allocate_sve_gpr_callee_saves stack: @@ -181,11 +183,11 @@ # CHECK: stackSize: 32 # CHECK: bb.0.entry: -# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 +# CHECK-NEXT: $sp = frame-setup STPXpre killed $fp, killed $[[SCRATCH:[a-z0-9]+]], $sp, -2 # CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-COUNT-2: frame-setup CFI_INSTRUCTION +# CHECK-COUNT-3: frame-setup CFI_INSTRUCTION # CHECK-NEXT: STR_ZXI $z0, $fp, -1 # CHECK-NEXT: STR_ZXI $z1, $fp, -2 @@ -193,7 +195,7 @@ # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 -# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16 +# CHECK-NEXT: $sp, $fp, $[[SCRATCH]] = frame-destroy LDPXpost $sp, 2 # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_address_sve: @@ -291,17 +293,17 @@ # CHECK: stackSize: 32 # CHECK: bb.0.entry: -# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 +# CHECK-NEXT: $sp = frame-setup STPXpre killed $fp, killed $[[SCRATCH:[a-z0-9]+]], $sp, -2 # CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-COUNT-2: frame-setup CFI_INSTRUCTION +# CHECK-COUNT-3: frame-setup CFI_INSTRUCTION # CHECK-NEXT: $x0 = LDRXui $fp, 2 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 -# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16 +# CHECK-NEXT: $sp, $fp, $[[SCRATCH]] = frame-destroy LDPXpost $sp, 2 # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_stack_arg_sve: @@ -371,10 +373,10 @@ # # ASM-LABEL: test_address_sve_out_of_range: # ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 2056 * VG -# ASM-NEXT: .cfi_offset w29, -16 +# ASM-NEXT: .cfi_offset w30, -16 # # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +2056, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 +# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -16 name: test_address_sve_out_of_range frameInfo: maxAlignment: 16 @@ -452,9 +454,9 @@ # CHECK: RET_ReallyLR # # ASM-LABEL: save_restore_pregs_sve: -# ASM: .cfi_offset w29, -16 +# ASM: .cfi_offset w30, -16 # -# UNWINDINFO: DW_CFA_offset: reg29 -16 +# UNWINDINFO: DW_CFA_offset: reg30 -16 name: save_restore_pregs_sve stack: - { id: 0, stack-id: default, size: 32, alignment: 16 } @@ -469,8 +471,7 @@ --- ... # CHECK-LABEL: name: save_restore_zregs_sve -# CHECK: $sp = frame-setup STRXpre killed $fp, $sp, -16 -# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 +# CHECK: $sp = frame-setup STRXpre killed $lr, $sp, -16 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 # CHECK-NEXT: frame-setup STR_ZXI killed $z10, $sp, 0 # CHECK-NEXT: frame-setup STR_ZXI killed $z9, $sp, 1 @@ -483,7 +484,7 @@ # CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1 # CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3 -# CHECK-NEXT: $sp, $fp = frame-destroy LDRXpost $sp, 16 +# CHECK-NEXT: $sp, $lr = frame-destroy LDRXpost $sp, 16 # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: save_restore_zregs_sve: @@ -494,7 +495,7 @@ # UNWINDINFO: DW_CFA_expression: reg72 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 +# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -16 name: save_restore_zregs_sve stack: @@ -516,8 +517,10 @@ # paired correctly. # # CHECK-LABEL: name: save_restore_sve -# CHECK: $sp = frame-setup STPXpre killed ${{[a-z0-9]+}}, killed $x21, $sp, -4 -# CHECK: frame-setup STPXi killed $x20, killed $x19, $sp, 2 +# CHECK: $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -6 +# CHECK: frame-setup STRXui killed $x21, $sp, 2 +# CHECK: frame-setup STPXi killed $x20, killed $x19, $sp, 4 +# CHECK: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK: $sp = frame-setup ADDVL_XXI $sp, -18 # CHECK: frame-setup STR_PXI killed $p15, $sp, 4 # CHECK: frame-setup STR_PXI killed $p14, $sp, 5 @@ -529,7 +532,7 @@ # CHECK: frame-setup STR_ZXI killed $z8, $sp, 17 # CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK: $sp = frame-setup SUBXri $sp, 32, 0 -# CHECK-COUNT-13: frame-setup CFI_INSTRUCTION +# CHECK-COUNT-14: frame-setup CFI_INSTRUCTION # CHECK: $sp = frame-destroy ADDXri $sp, 32, 0 # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 1 @@ -542,36 +545,39 @@ # CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16 # CHECK: $z8 = frame-destroy LDR_ZXI $sp, 17 # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 18 -# CHECK: $x20, $x19 = frame-destroy LDPXi $sp, 2 -# CHECK: $sp, ${{[a-z0-9]+}}, $x21 = frame-destroy LDPXpost $sp, 4 +# CHECK: $x20, $x19 = frame-destroy LDPXi $sp, 4 +# CHECK: $x21 = frame-destroy LDRXui $sp, 2 +# CHECK: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 6 # CHECK: RET_ReallyLR # # ASM-LABEL: save_restore_sve: -# ASM: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 32 - 32 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 32 - 40 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG -# ASM-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG +# ASM: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 48 - 32 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 48 - 40 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 48 - 48 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 48 - 56 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 48 - 64 * VG # ASM-NEXT: .cfi_offset w19, -8 # ASM-NEXT: .cfi_offset w20, -16 -# ASM-NEXT: .cfi_offset w21, -24 -# ASM-NEXT: .cfi_offset w29, -32 -# -# UNWINDINFO: DW_CFA_expression: reg72 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -32, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg76 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -40, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -48, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -56, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -64, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# ASM-NEXT: .cfi_offset w21, -32 +# ASM-NEXT: .cfi_offset w30, -40 +# ASM-NEXT: .cfi_offset w29, -48 +# +# UNWINDINFO: DW_CFA_expression: reg72 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -32, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg76 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -40, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -48, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -56, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_consts -48, DW_OP_plus, DW_OP_consts -64, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_offset: reg19 -8 # UNWINDINFO-NEXT: DW_CFA_offset: reg20 -16 -# UNWINDINFO-NEXT: DW_CFA_offset: reg21 -24 -# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 +# UNWINDINFO-NEXT: DW_CFA_offset: reg21 -32 +# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -40 +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -48 name: save_restore_sve stack: @@ -728,7 +734,7 @@ # CHECK: - { id: 9, name: '', type: spill-slot, offset: -36, size: 2, alignment: 2, # CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: '$p15', # CHECK: - { id: 10, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, -# CHECK-NEXT: stack-id: default, callee-saved-register: '$fp', +# CHECK-NEXT: stack-id: default, callee-saved-register: '$lr', # # CHECK: bb.0.entry: # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 @@ -742,11 +748,11 @@ # ASM-LABEL: frame_layout: # ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 80 * VG # ASM-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG -# ASM-NEXT: .cfi_offset w29, -16 +# ASM-NEXT: .cfi_offset w30, -16 # # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +80, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg72 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 +# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -16 name: frame_layout stack: - { id: 0, type: default, size: 32, alignment: 16, stack-id: scalable-vector } diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll --- a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll @@ -55,22 +55,24 @@ ; Verify splitvec type legalisation works as expected. define @reverse_nxv32i1( %a) #0 { -; CHECK-LABEL: reverse_nxv32i1: +; CHECK-SELDAG-LABEL: reverse_nxv32i1: ; CHECK-SELDAG: // %bb.0: ; CHECK-SELDAG-NEXT: rev p2.b, p1.b ; CHECK-SELDAG-NEXT: rev p1.b, p0.b ; CHECK-SELDAG-NEXT: mov p0.b, p2.b ; CHECK-SELDAG-NEXT: ret +; +; CHECK-FASTISEL-LABEL: reverse_nxv32i1: ; CHECK-FASTISEL: // %bb.0: -; CHECK-FASTISEL-NEXT: str x29, [sp, #-16] -; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1 -; CHECK-FASTISEL-NEXT: str p1, [sp, #7, mul vl] -; CHECK-FASTISEL-NEXT: mov p1.b, p0.b -; CHECK-FASTISEL-NEXT: ldr p0, [sp, #7, mul vl] -; CHECK-FASTISEL-NEXT: rev p0.b, p0.b -; CHECK-FASTISEL-NEXT: rev p1.b, p1.b -; CHECK-FASTISEL-NEXT: addvl sp, sp, #1 -; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16 +; CHECK-FASTISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1 +; CHECK-FASTISEL-NEXT: str p1, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-FASTISEL-NEXT: mov p1.b, p0.b +; CHECK-FASTISEL-NEXT: ldr p0, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-FASTISEL-NEXT: rev p0.b, p0.b +; CHECK-FASTISEL-NEXT: rev p1.b, p1.b +; CHECK-FASTISEL-NEXT: addvl sp, sp, #1 +; CHECK-FASTISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-FASTISEL-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv32i1( %a) @@ -163,22 +165,24 @@ ; Verify splitvec type legalisation works as expected. define @reverse_nxv8i32( %a) #0 { -; CHECK-LABEL: reverse_nxv8i32: +; CHECK-SELDAG-LABEL: reverse_nxv8i32: ; CHECK-SELDAG: // %bb.0: ; CHECK-SELDAG-NEXT: rev z2.s, z1.s ; CHECK-SELDAG-NEXT: rev z1.s, z0.s ; CHECK-SELDAG-NEXT: mov z0.d, z2.d ; CHECK-SELDAG-NEXT: ret +; +; CHECK-FASTISEL-LABEL: reverse_nxv8i32: ; CHECK-FASTISEL: // %bb.0: -; CHECK-FASTISEL-NEXT: str x29, [sp, #-16] -; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1 -; CHECK-FASTISEL-NEXT: str z1, [sp] -; CHECK-FASTISEL-NEXT: mov z1.d, z0.d -; CHECK-FASTISEL-NEXT: ldr z0, [sp] -; CHECK-FASTISEL-NEXT: rev z0.s, z0.s -; CHECK-FASTISEL-NEXT: rev z1.s, z1.s -; CHECK-FASTISEL-NEXT: addvl sp, sp, #1 -; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16 +; CHECK-FASTISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1 +; CHECK-FASTISEL-NEXT: str z1, [sp] // 16-byte Folded Spill +; CHECK-FASTISEL-NEXT: mov z1.d, z0.d +; CHECK-FASTISEL-NEXT: ldr z0, [sp] // 16-byte Folded Reload +; CHECK-FASTISEL-NEXT: rev z0.s, z0.s +; CHECK-FASTISEL-NEXT: rev z1.s, z1.s +; CHECK-FASTISEL-NEXT: addvl sp, sp, #1 +; CHECK-FASTISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-FASTISEL-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv8i32( %a) @@ -187,7 +191,7 @@ ; Verify splitvec type legalisation works as expected. define @reverse_nxv16f32( %a) #0 { -; CHECK-LABEL: reverse_nxv16f32: +; CHECK-SELDAG-LABEL: reverse_nxv16f32: ; CHECK-SELDAG: // %bb.0: ; CHECK-SELDAG-NEXT: rev z5.s, z3.s ; CHECK-SELDAG-NEXT: rev z4.s, z2.s @@ -196,21 +200,23 @@ ; CHECK-SELDAG-NEXT: mov z0.d, z5.d ; CHECK-SELDAG-NEXT: mov z1.d, z4.d ; CHECK-SELDAG-NEXT: ret +; +; CHECK-FASTISEL-LABEL: reverse_nxv16f32: ; CHECK-FASTISEL: // %bb.0: -; CHECK-FASTISEL-NEXT: str x29, [sp, #-16] -; CHECK-FASTISEL-NEXT: addvl sp, sp, #-2 -; CHECK-FASTISEL-NEXT: str z3, [sp, #1, mul vl] -; CHECK-FASTISEL-NEXT: str z2, [sp] -; CHECK-FASTISEL-NEXT: mov z2.d, z1.d -; CHECK-FASTISEL-NEXT: ldr z1, [sp] -; CHECK-FASTISEL-NEXT: mov z3.d, z0.d -; CHECK-FASTISEL-NEXT: ldr z0, [sp, #1, mul vl] -; CHECK-FASTISEL-NEXT: rev z0.s, z0.s -; CHECK-FASTISEL-NEXT: rev z1.s, z1.s -; CHECK-FASTISEL-NEXT: rev z2.s, z2.s -; CHECK-FASTISEL-NEXT: rev z3.s, z3.s -; CHECK-FASTISEL-NEXT: addvl sp, sp, #2 -; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16 +; CHECK-FASTISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-FASTISEL-NEXT: addvl sp, sp, #-2 +; CHECK-FASTISEL-NEXT: str z3, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-FASTISEL-NEXT: str z2, [sp] // 16-byte Folded Spill +; CHECK-FASTISEL-NEXT: mov z2.d, z1.d +; CHECK-FASTISEL-NEXT: ldr z1, [sp] // 16-byte Folded Reload +; CHECK-FASTISEL-NEXT: mov z3.d, z0.d +; CHECK-FASTISEL-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-FASTISEL-NEXT: rev z0.s, z0.s +; CHECK-FASTISEL-NEXT: rev z1.s, z1.s +; CHECK-FASTISEL-NEXT: rev z2.s, z2.s +; CHECK-FASTISEL-NEXT: rev z3.s, z3.s +; CHECK-FASTISEL-NEXT: addvl sp, sp, #2 +; CHECK-FASTISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-FASTISEL-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv16f32( %a) diff --git a/llvm/test/CodeGen/AArch64/split-vector-insert.ll b/llvm/test/CodeGen/AArch64/split-vector-insert.ll --- a/llvm/test/CodeGen/AArch64/split-vector-insert.ll +++ b/llvm/test/CodeGen/AArch64/split-vector-insert.ll @@ -16,10 +16,10 @@ ; CHECK-LABEL: test_nxv2i64_v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: cntd x8 ; CHECK-NEXT: sub x8, x8, #1 // =1 ; CHECK-NEXT: cmp x8, #0 // =0 @@ -55,7 +55,7 @@ ; CHECK-NEXT: str q4, [x10, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #3, mul vl] ; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %r = call @llvm.experimental.vector.insert.nxv2i64.v8i64( %a, <8 x i64> %b, i64 0) ret %r @@ -69,10 +69,10 @@ ; CHECK-LABEL: test_nxv2f64_v8f64: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: cntd x8 ; CHECK-NEXT: sub x8, x8, #1 // =1 ; CHECK-NEXT: cmp x8, #0 // =0 @@ -108,7 +108,7 @@ ; CHECK-NEXT: str q4, [x10, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #3, mul vl] ; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %r = call @llvm.experimental.vector.insert.nxv2f64.v8f64( %a, <8 x double> %b, i64 0) ret %r diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -9,7 +9,7 @@ define float @foo1(double* %x0, double* %x1, double* %x2) nounwind { ; CHECK-LABEL: foo1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x28, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] @@ -25,7 +25,7 @@ ; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] ; CHECK-NEXT: bl callee1 ; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x28, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: %0 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) @@ -40,19 +40,16 @@ define float @foo2(double* %x0, double* %x1) nounwind { ; CHECK-LABEL: foo2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: stp x30, x28, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 +; CHECK-NEXT: sub sp, sp, #16 // =16 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] ; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: addvl x8, x29, #-4 +; CHECK-NEXT: add x8, sp, #16 // =16 +; CHECK-NEXT: add x9, sp, #16 // =16 ; CHECK-NEXT: fmov s0, #1.00000000 -; CHECK-NEXT: st1d { z16.d }, p0, [x29, #-4, mul vl] -; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] ; CHECK-NEXT: mov w1, #1 ; CHECK-NEXT: mov w2, #2 ; CHECK-NEXT: mov w3, #3 @@ -60,12 +57,16 @@ ; CHECK-NEXT: mov w5, #5 ; CHECK-NEXT: mov w6, #6 ; CHECK-NEXT: mov w7, #7 -; CHECK-NEXT: str x8, [sp, #-16]! ; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: st1d { z16.d }, p0, [x9] +; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl callee2 ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: add sp, sp, #16 // =16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x28, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: %0 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) @@ -79,7 +80,7 @@ define float @foo3(double* %x0, double* %x1, double* %x2) nounwind { ; CHECK-LABEL: foo3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x28, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ld4d { z2.d, z3.d, z4.d, z5.d }, p0/z, [x0] @@ -95,7 +96,7 @@ ; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] ; CHECK-NEXT: bl callee3 ; CHECK-NEXT: addvl sp, sp, #3 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x28, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: %0 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll @@ -19,7 +19,7 @@ define <2 x i64> @extract_v2i64_nxv2i64_idx1( %vec) nounwind { ; CHECK-LABEL: extract_v2i64_nxv2i64_idx1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntd x8 ; CHECK-NEXT: sub x8, x8, #1 // =1 @@ -31,7 +31,7 @@ ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( %vec, i64 1) ret <2 x i64> %retval @@ -51,7 +51,7 @@ define <4 x i32> @extract_v4i32_nxv4i32_idx1( %vec) nounwind { ; CHECK-LABEL: extract_v4i32_nxv4i32_idx1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntw x8 ; CHECK-NEXT: sub x8, x8, #1 // =1 @@ -63,7 +63,7 @@ ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( %vec, i64 1) ret <4 x i32> %retval @@ -83,7 +83,7 @@ define <8 x i16> @extract_v8i16_nxv8i16_idx1( %vec) nounwind { ; CHECK-LABEL: extract_v8i16_nxv8i16_idx1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cnth x8 ; CHECK-NEXT: sub x8, x8, #1 // =1 @@ -95,7 +95,7 @@ ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( %vec, i64 1) ret <8 x i16> %retval @@ -115,7 +115,7 @@ define <16 x i8> @extract_v16i8_nxv16i8_idx1( %vec) nounwind { ; CHECK-LABEL: extract_v16i8_nxv16i8_idx1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: rdvl x8, #1 ; CHECK-NEXT: sub x8, x8, #1 // =1 @@ -126,7 +126,7 @@ ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( %vec, i64 1) ret <16 x i8> %retval diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -7,7 +7,7 @@ define @insert_v2i64_nxv2i64( %vec, <2 x i64> %subvec) nounwind { ; CHECK-LABEL: insert_v2i64_nxv2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntd x8 ; CHECK-NEXT: sub x8, x8, #1 // =1 @@ -20,7 +20,7 @@ ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call @llvm.experimental.vector.insert.nxv2i64.v2i64( %vec, <2 x i64> %subvec, i64 0) ret %retval @@ -29,7 +29,7 @@ define @insert_v2i64_nxv2i64_idx1( %vec, <2 x i64> %subvec) nounwind { ; CHECK-LABEL: insert_v2i64_nxv2i64_idx1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntd x8 ; CHECK-NEXT: sub x8, x8, #1 // =1 @@ -42,7 +42,7 @@ ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call @llvm.experimental.vector.insert.nxv2i64.v2i64( %vec, <2 x i64> %subvec, i64 1) ret %retval @@ -51,7 +51,7 @@ define @insert_v4i32_nxv4i32( %vec, <4 x i32> %subvec) nounwind { ; CHECK-LABEL: insert_v4i32_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntw x8 ; CHECK-NEXT: sub x8, x8, #1 // =1 @@ -64,7 +64,7 @@ ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call @llvm.experimental.vector.insert.nxv4i32.v4i32( %vec, <4 x i32> %subvec, i64 0) ret %retval @@ -73,7 +73,7 @@ define @insert_v4i32_nxv4i32_idx1( %vec, <4 x i32> %subvec) nounwind { ; CHECK-LABEL: insert_v4i32_nxv4i32_idx1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntw x8 ; CHECK-NEXT: sub x8, x8, #1 // =1 @@ -86,7 +86,7 @@ ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call @llvm.experimental.vector.insert.nxv4i32.v4i32( %vec, <4 x i32> %subvec, i64 1) ret %retval @@ -95,7 +95,7 @@ define @insert_v8i16_nxv8i16( %vec, <8 x i16> %subvec) nounwind { ; CHECK-LABEL: insert_v8i16_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cnth x8 ; CHECK-NEXT: sub x8, x8, #1 // =1 @@ -108,7 +108,7 @@ ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call @llvm.experimental.vector.insert.nxv8i16.v8i16( %vec, <8 x i16> %subvec, i64 0) ret %retval @@ -117,7 +117,7 @@ define @insert_v8i16_nxv8i16_idx1( %vec, <8 x i16> %subvec) nounwind { ; CHECK-LABEL: insert_v8i16_nxv8i16_idx1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cnth x8 ; CHECK-NEXT: sub x8, x8, #1 // =1 @@ -130,7 +130,7 @@ ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call @llvm.experimental.vector.insert.nxv8i16.v8i16( %vec, <8 x i16> %subvec, i64 1) ret %retval @@ -139,7 +139,7 @@ define @insert_v16i8_nxv16i8( %vec, <16 x i8> %subvec) nounwind { ; CHECK-LABEL: insert_v16i8_nxv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: rdvl x8, #1 ; CHECK-NEXT: sub x8, x8, #1 // =1 @@ -151,7 +151,7 @@ ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call @llvm.experimental.vector.insert.nxv16i8.v16i8( %vec, <16 x i8> %subvec, i64 0) ret %retval @@ -160,7 +160,7 @@ define @insert_v16i8_nxv16i8_idx1( %vec, <16 x i8> %subvec) nounwind { ; CHECK-LABEL: insert_v16i8_nxv16i8_idx1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: rdvl x8, #1 ; CHECK-NEXT: sub x8, x8, #1 // =1 @@ -172,7 +172,7 @@ ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call @llvm.experimental.vector.insert.nxv16i8.v16i8( %vec, <16 x i8> %subvec, i64 1) ret %retval diff --git a/llvm/test/CodeGen/AArch64/sve-pred-arith.ll b/llvm/test/CodeGen/AArch64/sve-pred-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-pred-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-arith.ll @@ -53,7 +53,7 @@ define aarch64_sve_vector_pcs @add_nxv64i1( %a, %b) { ; CHECK-LABEL: add_nxv64i1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill @@ -61,7 +61,7 @@ ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: ldr p4, [x3] ; CHECK-NEXT: ldr p5, [x0] ; CHECK-NEXT: ldr p6, [x1] @@ -77,7 +77,7 @@ ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %res = add %a, %b ret %res; @@ -133,7 +133,7 @@ define aarch64_sve_vector_pcs @sub_nxv64i1( %a, %b) { ; CHECK-LABEL: sub_nxv64i1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill @@ -141,7 +141,7 @@ ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: ldr p4, [x3] ; CHECK-NEXT: ldr p5, [x0] ; CHECK-NEXT: ldr p6, [x1] @@ -157,7 +157,7 @@ ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %res = sub %a, %b ret %res; diff --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -23,10 +23,10 @@ define i8 @split_extract_32i8_idx( %a, i32 %idx) { ; CHECK-LABEL: split_extract_32i8_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: rdvl x10, #2 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x9, w0 @@ -39,7 +39,7 @@ ; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: ldrb w0, [x8, x9] ; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i8 %ext @@ -48,10 +48,10 @@ define i16 @split_extract_16i16_idx( %a, i32 %idx) { ; CHECK-LABEL: split_extract_16i16_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: rdvl x10, #1 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x9, w0 @@ -64,7 +64,7 @@ ; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: ldrh w0, [x8, x9, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i16 %ext @@ -73,10 +73,10 @@ define i32 @split_extract_8i32_idx( %a, i32 %idx) { ; CHECK-LABEL: split_extract_8i32_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: cnth x10 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x9, w0 @@ -89,7 +89,7 @@ ; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: ldr w0, [x8, x9, lsl #2] ; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i32 %ext @@ -98,10 +98,10 @@ define i64 @split_extract_8i64_idx( %a, i32 %idx) { ; CHECK-LABEL: split_extract_8i64_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: cnth x10 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x9, w0 @@ -116,7 +116,7 @@ ; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: ldr x0, [x8, x9, lsl #3] ; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i64 %ext @@ -145,10 +145,10 @@ define i16 @split_extract_16i16( %a) { ; CHECK-LABEL: split_extract_16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: rdvl x10, #1 ; CHECK-NEXT: sub x10, x10, #1 // =1 ; CHECK-NEXT: ptrue p0.h @@ -160,7 +160,7 @@ ; CHECK-NEXT: csel x9, x10, x9, lo ; CHECK-NEXT: ldrh w0, [x8, x9, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ext = extractelement %a, i32 128 ret i16 %ext @@ -169,10 +169,10 @@ define i32 @split_extract_16i32( %a) { ; CHECK-LABEL: split_extract_16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w9, #34464 ; CHECK-NEXT: rdvl x10, #1 ; CHECK-NEXT: movk w9, #1, lsl #16 @@ -187,7 +187,7 @@ ; CHECK-NEXT: csel x9, x10, x9, lo ; CHECK-NEXT: ldr w0, [x8, x9, lsl #2] ; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ext = extractelement %a, i32 100000 ret i32 %ext @@ -196,10 +196,10 @@ define i64 @split_extract_4i64( %a) { ; CHECK-LABEL: split_extract_4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: cntw x10 ; CHECK-NEXT: sub x10, x10, #1 // =1 ; CHECK-NEXT: ptrue p0.d @@ -211,7 +211,7 @@ ; CHECK-NEXT: csel x9, x10, x9, lo ; CHECK-NEXT: ldr x0, [x8, x9, lsl #3] ; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ext = extractelement %a, i32 10 ret i64 %ext diff --git a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll @@ -23,10 +23,10 @@ define @split_insert_32i8_idx( %a, i8 %elt, i64 %idx) { ; CHECK-LABEL: split_insert_32i8_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: rdvl x8, #2 ; CHECK-NEXT: sub x8, x8, #1 // =1 ; CHECK-NEXT: cmp x1, x8 @@ -39,7 +39,7 @@ ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x9, #1, mul vl] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ins = insertelement %a, i8 %elt, i64 %idx ret %ins @@ -48,10 +48,10 @@ define @split_insert_8f32_idx( %a, float %elt, i64 %idx) { ; CHECK-LABEL: split_insert_8f32_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: cnth x8 ; CHECK-NEXT: sub x8, x8, #1 // =1 ; CHECK-NEXT: cmp x0, x8 @@ -64,7 +64,7 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x9, #1, mul vl] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ins = insertelement %a, float %elt, i64 %idx ret %ins @@ -73,10 +73,10 @@ define @split_insert_8i64_idx( %a, i64 %elt, i64 %idx) { ; CHECK-LABEL: split_insert_8i64_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: cnth x8 ; CHECK-NEXT: sub x8, x8, #1 // =1 ; CHECK-NEXT: cmp x1, x8 @@ -93,7 +93,7 @@ ; CHECK-NEXT: ld1d { z3.d }, p0/z, [x9, #3, mul vl] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ins = insertelement %a, i64 %elt, i64 %idx ret %ins @@ -135,10 +135,10 @@ define @split_insert_32i16( %a, i16 %elt) { ; CHECK-LABEL: split_insert_32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: rdvl x10, #2 ; CHECK-NEXT: sub x10, x10, #1 // =1 ; CHECK-NEXT: mov w9, #128 @@ -156,7 +156,7 @@ ; CHECK-NEXT: ld1h { z3.h }, p0/z, [x8, #3, mul vl] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ins = insertelement %a, i16 %elt, i64 128 ret %ins @@ -165,10 +165,10 @@ define @split_insert_8i32( %a, i32 %elt) { ; CHECK-LABEL: split_insert_8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w9, #16960 ; CHECK-NEXT: cnth x10 ; CHECK-NEXT: movk w9, #15, lsl #16 @@ -183,7 +183,7 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ins = insertelement %a, i32 %elt, i64 1000000 ret %ins diff --git a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll @@ -23,11 +23,11 @@ define i1 @andv_nxv64i1( %a) { ; CHECK-LABEL: andv_nxv64i1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: ptrue p4.b ; CHECK-NEXT: and p1.b, p4/z, p1.b, p3.b ; CHECK-NEXT: and p0.b, p4/z, p0.b, p2.b @@ -37,7 +37,7 @@ ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.and.nxv64i1( %a) ret i1 %res diff --git a/llvm/test/CodeGen/AArch64/sve-tailcall.ll b/llvm/test/CodeGen/AArch64/sve-tailcall.ll --- a/llvm/test/CodeGen/AArch64/sve-tailcall.ll +++ b/llvm/test/CodeGen/AArch64/sve-tailcall.ll @@ -11,7 +11,7 @@ define @sve_caller_sve_callee() nounwind { ; CHECK-LABEL: sve_caller_sve_callee: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill @@ -20,7 +20,7 @@ ; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: b sve_callee tail call void asm sideeffect "", "~{z9},~{z10}"() %call = tail call @sve_callee() @@ -35,7 +35,7 @@ define i32 @sve_caller_non_sve_callee( %arg) nounwind { ; CHECK-LABEL: sve_caller_non_sve_callee: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x28, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill @@ -97,7 +97,7 @@ ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x28, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret tail call void asm sideeffect "", "~{z9},~{z10}"() %call = tail call i32 @non_sve_callee() diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll --- a/llvm/test/CodeGen/AArch64/sve-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll @@ -113,11 +113,11 @@ define @trunc_i64toi1_split3( %in) { ; CHECK-LABEL: trunc_i64toi1_split3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z7.d, z7.d, #0x1 ; CHECK-NEXT: and z6.d, z6.d, #0x1 @@ -144,7 +144,7 @@ ; CHECK-NEXT: uzp1 p0.h, p0.h, p2.h ; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %out = trunc %in to diff --git a/llvm/test/CodeGen/AArch64/unwind-preserved.ll b/llvm/test/CodeGen/AArch64/unwind-preserved.ll --- a/llvm/test/CodeGen/AArch64/unwind-preserved.ll +++ b/llvm/test/CodeGen/AArch64/unwind-preserved.ll @@ -8,7 +8,7 @@ ; CHECK: .Lfunc_begin0: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x28, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill @@ -48,8 +48,8 @@ ; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w28, -8 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: bl may_throw_sve @@ -88,7 +88,7 @@ ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x28, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %.Lunwind ; CHECK-NEXT: .Ltmp2: @@ -123,14 +123,14 @@ ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x28, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; GISEL-LABEL: invoke_callee_may_throw_sve: ; GISEL: .Lfunc_begin0: ; GISEL-NEXT: .cfi_startproc ; GISEL-NEXT: // %bb.0: -; GISEL-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; GISEL-NEXT: stp x30, x28, [sp, #-16]! // 16-byte Folded Spill ; GISEL-NEXT: addvl sp, sp, #-18 ; GISEL-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; GISEL-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill @@ -170,8 +170,8 @@ ; GISEL-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG ; GISEL-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG ; GISEL-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG -; GISEL-NEXT: .cfi_offset w30, -8 -; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: .cfi_offset w28, -8 +; GISEL-NEXT: .cfi_offset w30, -16 ; GISEL-NEXT: str z0, [sp] // 16-byte Folded Spill ; GISEL-NEXT: .Ltmp0: ; GISEL-NEXT: bl may_throw_sve @@ -210,7 +210,7 @@ ; GISEL-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #18 -; GISEL-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; GISEL-NEXT: ldp x30, x28, [sp], #16 // 16-byte Folded Reload ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB0_2: // %.Lunwind ; GISEL-NEXT: .Ltmp2: @@ -245,7 +245,7 @@ ; GISEL-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #18 -; GISEL-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; GISEL-NEXT: ldp x30, x28, [sp], #16 // 16-byte Folded Reload ; GISEL-NEXT: ret %result = invoke @may_throw_sve( %v) to label %.Lcontinue unwind label %.Lunwind .Lcontinue: @@ -273,10 +273,10 @@ ; CHECK-NEXT: stp q13, q12, [sp, #192] // 32-byte Folded Spill ; CHECK-NEXT: stp q11, q10, [sp, #224] // 32-byte Folded Spill ; CHECK-NEXT: stp q9, q8, [sp, #256] // 32-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #288] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x28, [sp, #288] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 304 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w28, -8 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: .cfi_offset b8, -32 ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: .cfi_offset b10, -64 @@ -301,7 +301,7 @@ ; CHECK-NEXT: b .LBB1_1 ; CHECK-NEXT: .LBB1_1: // %.Lcontinue ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x28, [sp, #288] // 16-byte Folded Reload ; CHECK-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; CHECK-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload ; CHECK-NEXT: ldp q13, q12, [sp, #192] // 32-byte Folded Reload @@ -315,7 +315,7 @@ ; CHECK-NEXT: .LBB1_2: // %.Lunwind ; CHECK-NEXT: .Ltmp5: ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x28, [sp, #288] // 16-byte Folded Reload ; CHECK-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; CHECK-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload ; CHECK-NEXT: ldp q13, q12, [sp, #192] // 32-byte Folded Reload @@ -340,10 +340,10 @@ ; GISEL-NEXT: stp q13, q12, [sp, #192] // 32-byte Folded Spill ; GISEL-NEXT: stp q11, q10, [sp, #224] // 32-byte Folded Spill ; GISEL-NEXT: stp q9, q8, [sp, #256] // 32-byte Folded Spill -; GISEL-NEXT: stp x29, x30, [sp, #288] // 16-byte Folded Spill +; GISEL-NEXT: stp x30, x28, [sp, #288] // 16-byte Folded Spill ; GISEL-NEXT: .cfi_def_cfa_offset 304 -; GISEL-NEXT: .cfi_offset w30, -8 -; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: .cfi_offset w28, -8 +; GISEL-NEXT: .cfi_offset w30, -16 ; GISEL-NEXT: .cfi_offset b8, -32 ; GISEL-NEXT: .cfi_offset b9, -48 ; GISEL-NEXT: .cfi_offset b10, -64 @@ -367,7 +367,7 @@ ; GISEL-NEXT: .Ltmp4: ; GISEL-NEXT: // %bb.1: // %.Lcontinue ; GISEL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload +; GISEL-NEXT: ldp x30, x28, [sp, #288] // 16-byte Folded Reload ; GISEL-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; GISEL-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload ; GISEL-NEXT: ldp q13, q12, [sp, #192] // 32-byte Folded Reload @@ -381,7 +381,7 @@ ; GISEL-NEXT: .LBB1_2: // %.Lunwind ; GISEL-NEXT: .Ltmp5: ; GISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload +; GISEL-NEXT: ldp x30, x28, [sp, #288] // 16-byte Folded Reload ; GISEL-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; GISEL-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload ; GISEL-NEXT: ldp q13, q12, [sp, #192] // 32-byte Folded Reload