Index: lib/Target/AArch64/AArch64CallingConvention.td =================================================================== --- lib/Target/AArch64/AArch64CallingConvention.td +++ lib/Target/AArch64/AArch64CallingConvention.td @@ -325,23 +325,23 @@ // It would be better to model its preservation semantics properly (create a // vreg on entry, use it in RET & tail call generation; make that vreg def if we // end up saving LR as part of a call frame). Watch this space... -def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, - X23, X24, X25, X26, X27, X28, +def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP, D8, D9, D10, D11, D12, D13, D14, D15)>; // Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x. // We put FP before LR, so that frame lowering logic generates (FP,LR) pairs, // and not (LR,FP) pairs. -def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add FP, LR, X19, X20, X21, X22, - X23, X24, X25, X26, X27, X28, +def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, FP, LR, D8, D9, D10, D11, D12, D13, D14, D15)>; // AArch64 PCS for vector functions (VPCS) // must (additionally) preserve full Q8-Q23 registers -def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, - X23, X24, X25, X26, X27, X28, +def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP, (sequence "Q%u", 8, 23))>; // Functions taking SVE arguments or returning an SVE type Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -44,13 +44,13 @@ // | | // |-----------------------------------| // | | +// | other callee-saved registers | +// | | +// |- - - - - - - - - - - - - - - - - -| +// | | // | prev_fp, prev_lr | // | (a.k.a. "frame record") | // |-----------------------------------| <- fp(=x29) -// | | -// | other callee-saved registers | -// | | -// |-----------------------------------| // |.empty.space.to.make.part.below....| // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at // |.the.standard.16-byte.alignment....| compile time; if present) @@ -80,6 +80,14 @@ // * A frame pointer is definitely needed when there are local variables with // more-than-default alignment requirements. // +// The frame-record (fp, lr) is stored at the bottom of the callee-save area +// to allow SVE stack objects (allocated directly below the callee-saves) to +// be accessed directly from the framepointer. The SVE spill/fill instructions +// have runtime-VL-scaled addressing modes such as: +// ldr z8, [fp, #-7 mul vl] +// With this layout, we don't need to add an unscaled offset to the framepointer +// before accessing the object in the frame. +// // In some cases when a base pointer is not strictly needed, it is generated // anyway when offsets from the frame pointer to access local variables become // so large that the offset can't be encoded in the immediate fields of loads @@ -319,6 +327,7 @@ const TargetSubtargetInfo &STI = MF.getSubtarget(); const MCRegisterInfo *MRI = STI.getRegisterInfo(); const TargetInstrInfo *TII = STI.getInstrInfo(); + bool HasFP = hasFP(MF); DebugLoc DL = MBB.findDebugLoc(MBBI); // Add callee saved registers to move list. @@ -326,7 +335,7 @@ if (CSI.empty()) return; - for (const auto &Info : CSI) { + auto BuildCFIInst = [&](const CalleeSavedInfo &Info) { unsigned Reg = Info.getReg(); int64_t Offset = MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea(); @@ -336,7 +345,30 @@ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); + }; + + auto IsFrameRecordReg = [HasFP](const CalleeSavedInfo &Info) { + return HasFP && + (Info.getReg() == AArch64::LR || Info.getReg() == AArch64::FP); + }; + + // Emit the CFI instructions for LR and FP first to support + // compact unwind encoding. + if (HasFP) { + unsigned FrameRecordCFIs = 0; + for (const auto &Info : CSI) { + if (IsFrameRecordReg(Info)) { + BuildCFIInst(Info); + if (++FrameRecordCFIs >= 2) + break; + } + } } + + // Emit remaining CFI instructions. + for (const auto &Info : CSI) + if (!IsFrameRecordReg(Info)) + BuildCFIInst(Info); } // Find a scratch register that we can use at the start of the prologue to @@ -952,9 +984,8 @@ } if (HasFP) { - // Only set up FP if we actually need to. Frame pointer is fp = - // sp - fixedobject - 16. - int FPOffset = AFI->getCalleeSavedStackSize() - 16; + // Only set up FP if we actually need to. + int FPOffset = 0; if (CombineSPBump) FPOffset += AFI->getLocalStackSize(); @@ -1135,8 +1166,7 @@ } if (needsFrameMoves) { - const DataLayout &TD = MF.getDataLayout(); - const int StackGrowth = -TD.getPointerSize(0); + const int StackGrowth = -AFI->getCalleeSavedStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // @@ -1208,7 +1238,7 @@ // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( - nullptr, Reg, 2 * StackGrowth - FixedObject)); + nullptr, Reg, StackGrowth - FixedObject)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -1463,8 +1493,7 @@ // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) - emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, - {-(int64_t)AFI->getCalleeSavedStackSize() + 16, MVT::i8}, + emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, {0, MVT::i8}, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); else if (NumBytes) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, @@ -1526,7 +1555,7 @@ bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; - return {ObjectOffset + FixedObject + 16, MVT::i8}; + return {ObjectOffset + FixedObject + AFI->getCalleeSavedStackSize(), MVT::i8}; } static StackOffset getStackOffset(const MachineFunction &MF, int ObjectOffset) { @@ -1689,6 +1718,23 @@ return true; } +/// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction. +/// WindowsCFI requires that only consecutive registers can be paired. +/// LR and FP need to be allocated together when the frame needs to save +/// the frame-record. This means any other register pairing with LR is invalid. +static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, + bool NeedsWinCFI, bool NeedsFrameRecord) { + if (NeedsWinCFI) + return invalidateWindowsRegisterPairing(Reg1, Reg2, true); + + // If we need to store the frame record, don't pair any register + // with LR other than FP. + if (NeedsFrameRecord) + return Reg2 == AArch64::LR; + + return false; +} + namespace { struct RegPairInfo { @@ -1708,7 +1754,7 @@ static void computeCalleeSaveRegisterPairs( MachineFunction &MF, const std::vector &CSI, const TargetRegisterInfo *TRI, SmallVectorImpl &RegPairs, - bool &NeedShadowCallStackProlog) { + bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) { if (CSI.empty()) return; @@ -1750,7 +1796,8 @@ switch (RPI.Type) { case RegPairInfo::GPR: if (AArch64::GPR64RegClass.contains(NextReg) && - !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI)) + !invalidateRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI, + NeedsFrameRecord)) RPI.Reg2 = NextReg; break; case RegPairInfo::FPR64: @@ -1784,6 +1831,10 @@ (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && "Out of order callee saved regs!"); + assert((!RPI.isPaired() || RPI.Reg2 != AArch64::FP || + RPI.Reg1 == AArch64::LR) && + "FrameRecord must be allocated together with LR"); + // MachO's compact unwind format relies on all registers being stored in // adjacent register pairs. assert((!produceCompactUnwindFrame(MF) || @@ -1832,7 +1883,7 @@ bool NeedShadowCallStackProlog = false; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, - NeedShadowCallStackProlog); + NeedShadowCallStackProlog, hasFP(MF)); const MachineRegisterInfo &MRI = MF.getRegInfo(); if (NeedShadowCallStackProlog) { @@ -1962,7 +2013,7 @@ bool NeedShadowCallStackProlog = false; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, - NeedShadowCallStackProlog); + NeedShadowCallStackProlog, hasFP(MF)); auto EmitMI = [&](const RegPairInfo &RPI) { unsigned Reg1 = RPI.Reg1; Index: test/CodeGen/AArch64/GlobalISel/swifterror.ll =================================================================== --- test/CodeGen/AArch64/GlobalISel/swifterror.ll +++ test/CodeGen/AArch64/GlobalISel/swifterror.ll @@ -317,12 +317,12 @@ ; CHECK-LABEL: params_in_reg ; Save callee saved registers and swifterror since it will be clobbered by the first call to params_in_reg2. -; CHECK: stp x28, x0, [sp +; CHECK: stp x29, x30, [sp +; CHECK: str x28, [sp ; CHECK: stp x27, x26, [sp ; CHECK: stp x25, x24, [sp ; CHECK: stp x23, x22, [sp ; CHECK: stp x20, x19, [sp -; CHECK: stp x29, x30, [sp ; Store argument registers. ; CHECK: mov x20, x1 ; CHECK: mov x22, x2 @@ -345,7 +345,7 @@ ; CHECK: mov x21, xzr ; CHECK: bl _params_in_reg2 ; Restore original arguments for next call. -; CHECK: ldr x0, [sp +; CHECK: ldr x0, [x29 ; CHECK: mov x1, x20 ; CHECK: mov x2, x22 ; CHECK: mov x3, x23 @@ -358,8 +358,6 @@ ; CHECK: bl _params_in_reg2 ; Restore calle save registers but don't clober swifterror x21. ; CHECK-NOT: x21 -; CHECK: ldp x29, x30, [sp -; CHECK-NOT: x21 ; CHECK: ldp x20, x19, [sp ; CHECK-NOT: x21 ; CHECK: ldp x23, x22, [sp @@ -370,6 +368,8 @@ ; CHECK-NOT: x21 ; CHECK: ldr x28, [sp ; CHECK-NOT: x21 +; CHECK: ldp x29, x30, [sp +; CHECK-NOT: x21 ; CHECK: ret define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8*, %swift_error** nocapture swifterror %err) { %error_ptr_ref = alloca swifterror %swift_error*, align 8 @@ -382,12 +382,12 @@ ; CHECK-LABEL: params_and_return_in_reg ; Store callee saved registers. -; CHECK: stp x28, x0, [sp, #16 +; CHECK: stp x29, x30, [sp, #16 +; CHECK: str x28, [sp ; CHECK: stp x27, x26, [sp ; CHECK: stp x25, x24, [sp ; CHECK: stp x23, x22, [sp ; CHECK: stp x20, x19, [sp -; CHECK: stp x29, x30, [sp ; Save original arguments. ; CHECK: mov x20, x1 ; CHECK: mov x22, x2 @@ -411,7 +411,7 @@ ; Store swifterror %error_ptr_ref. ; CHECK: stp {{x[0-9]+}}, x21, [sp] ; Setup call arguments from original arguments. -; CHECK: ldr x0, [sp, #24 +; CHECK: ldr x0, [x29, #24 ; CHECK: mov x1, x20 ; CHECK: mov x2, x22 ; CHECK: mov x3, x23 @@ -455,12 +455,12 @@ ; CHECK: mov x7, x28 ; CHECK: mov x21, x19 ; Restore callee save registers. -; CHECK: ldp x29, x30, [sp ; CHECK: ldp x20, x19, [sp ; CHECK: ldp x23, x22, [sp ; CHECK: ldp x25, x24, [sp ; CHECK: ldp x27, x26, [sp ; CHECK: ldr x28, [sp +; CHECK: ldp x29, x30, [sp ; CHECK: ret define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* , %swift_error** nocapture swifterror %err) { %error_ptr_ref = alloca swifterror %swift_error*, align 8 Index: test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll =================================================================== --- test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll +++ test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll @@ -99,18 +99,18 @@ ; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved ; CHECK: sub sp, sp, #32 -; CHECK: stp x19, x30, [sp, #16] +; CHECK: stp x30, x19, [sp, #16] ; Check correctness of cfi pseudo-instructions ; CHECK: .cfi_def_cfa_offset 32 -; CHECK: .cfi_offset w30, -8 -; CHECK: .cfi_offset w19, -16 +; CHECK: .cfi_offset w19, -8 +; CHECK: .cfi_offset w30, -16 ; Check correct access to arguments passed on the stack, through stack pointer ; CHECK: ldr d[[DARG:[0-9]+]], [sp, #56] ; CHECK: ldr w[[IARG:[0-9]+]], [sp, #40] ; Check correct access to local variable on the stack, through stack pointer ; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12] ; Check epilogue: -; CHECK: ldp x19, x30, [sp, #16] +; CHECK: ldp x30, x19, [sp, #16] ; CHECK: ret ; CHECK: .cfi_endproc @@ -118,24 +118,24 @@ ; CHECK-MACHO: .cfi_startproc ; Check that used callee-saved registers are saved ; CHECK-MACHO: sub sp, sp, #48 -; CHECK-MACHO: stp x20, x19, [sp, #16] +; CHECK-MACHO: stp x29, x30, [sp, #16] +; CHECK-MACHO: stp x20, x19, [sp, #32] ; Check that the frame pointer is created: -; CHECK-MACHO: stp x29, x30, [sp, #32] -; CHECK-MACHO: add x29, sp, #32 +; CHECK-MACHO: add x29, sp, #16 ; Check correctness of cfi pseudo-instructions -; CHECK-MACHO: .cfi_def_cfa w29, 16 -; CHECK-MACHO: .cfi_offset w30, -8 -; CHECK-MACHO: .cfi_offset w29, -16 -; CHECK-MACHO: .cfi_offset w19, -24 -; CHECK-MACHO: .cfi_offset w20, -32 +; CHECK-MACHO: .cfi_def_cfa w29, 32 +; CHECK-MACHO: .cfi_offset w30, -24 +; CHECK-MACHO: .cfi_offset w29, -32 +; CHECK-MACHO: .cfi_offset w19, -8 +; CHECK-MACHO: .cfi_offset w20, -16 ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] -; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] -; Check correct access to local variable on the stack, through stack pointer -; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp, #12] +; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #48] +; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #36] +; Check correct access to local variable on the stack +; CHECK-MACHO: ldur w[[ILOC:[0-9]+]], [x29, #-4] ; Check epilogue: -; CHECK-MACHO: ldp x29, x30, [sp, #32] -; CHECK-MACHO: ldp x20, x19, [sp, #16] +; CHECK-MACHO: ldp x20, x19, [sp, #32] +; CHECK-MACHO: ldp x29, x30, [sp, #16] ; CHECK-MACHO: ret ; CHECK-MACHO: .cfi_endproc @@ -180,57 +180,57 @@ ; CHECK-LABEL: novla_dynamicrealign_call ; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK: str x19, [sp, #-32]! +; CHECK: stp x29, x30, [sp, #-32]! ; Check that the frame pointer is created: -; CHECK: stp x29, x30, [sp, #16] -; CHECK: add x29, sp, #16 +; CHECK: str x19, [sp, #16] +; CHECK: mov x29, sp ; Check the dynamic realignment of the stack pointer to a 128-byte boundary ; CHECK: sub x9, sp, #96 ; CHECK: and sp, x9, #0xffffffffffffff80 ; Check correctness of cfi pseudo-instructions -; CHECK: .cfi_def_cfa w29, 16 -; CHECK: .cfi_offset w30, -8 -; CHECK: .cfi_offset w29, -16 -; CHECK: .cfi_offset w19, -32 +; CHECK: .cfi_def_cfa w29, 32 +; CHECK: .cfi_offset w30, -24 +; CHECK: .cfi_offset w29, -32 +; CHECK: .cfi_offset w19, -16 ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] -; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] +; CHECK: ldr d[[DARG:[0-9]+]], [x29, #56] +; CHECK: ldr w[[IARG:[0-9]+]], [x29, #40] ; Check correct access to local variable on the stack, through re-aligned stack pointer ; CHECK: ldr w[[ILOC:[0-9]+]], [sp] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK: sub sp, x29, #16 // =16 -; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldr x19, [sp], #32 +; CHECK: mov sp, x29 +; CHECK: ldr x19, [sp, #16] +; CHECK: ldp x29, x30, [sp], #32 ; CHECK: ret ; CHECK: .cfi_endproc ; CHECK-MACHO-LABEL: _novla_dynamicrealign_call: ; CHECK-MACHO: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK-MACHO: stp x20, x19, [sp, #-32]! +; CHECK-MACHO: stp x29, x30, [sp, #-32]! ; Check that the frame pointer is created: -; CHECK-MACHO: stp x29, x30, [sp, #16] -; CHECK-MACHO: add x29, sp, #16 +; CHECK-MACHO: stp x20, x19, [sp, #16] +; CHECK-MACHO: mov x29, sp ; Check the dynamic realignment of the stack pointer to a 128-byte boundary ; CHECK-MACHO: sub x9, sp, #96 ; CHECK-MACHO: and sp, x9, #0xffffffffffffff80 ; Check correctness of cfi pseudo-instructions -; CHECK-MACHO: .cfi_def_cfa w29, 16 -; CHECK-MACHO: .cfi_offset w30, -8 -; CHECK-MACHO: .cfi_offset w29, -16 -; CHECK-MACHO: .cfi_offset w19, -24 -; CHECK-MACHO: .cfi_offset w20, -32 +; CHECK-MACHO: .cfi_def_cfa w29, 32 +; CHECK-MACHO: .cfi_offset w30, -24 +; CHECK-MACHO: .cfi_offset w29, -32 +; CHECK-MACHO: .cfi_offset w19, -8 +; CHECK-MACHO: .cfi_offset w20, -16 ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] -; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] +; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #48] +; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #36] ; Check correct access to local variable on the stack, through re-aligned stack pointer ; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK-MACHO: sub sp, x29, #16 -; CHECK-MACHO: ldp x29, x30, [sp, #16] -; CHECK-MACHO: ldp x20, x19, [sp], #32 +; CHECK-MACHO: mov sp, x29 +; CHECK-MACHO: ldp x20, x19, [sp, #16] +; CHECK-MACHO: ldp x29, x30, [sp], #32 ; CHECK-MACHO: ret ; CHECK-MACHO: .cfi_endproc @@ -284,22 +284,22 @@ ; CHECK-LABEL: vla_nodynamicrealign_call ; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK: stp x20, x19, [sp, #-32]! +; CHECK: stp x29, x30, [sp, #-32]! ; Check that the frame pointer is created: -; CHECK: stp x29, x30, [sp, #16] -; CHECK: add x29, sp, #16 +; CHECK: stp x20, x19, [sp, #16] +; CHECK: mov x29, sp ; Check that space is reserved on the stack for the local variable, ; rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned. ; CHECK: sub sp, sp, #16 ; Check correctness of cfi pseudo-instructions -; CHECK: .cfi_def_cfa w29, 16 -; CHECK: .cfi_offset w30, -8 -; CHECK: .cfi_offset w29, -16 -; CHECK: .cfi_offset w19, -24 -; CHECK: .cfi_offset w20, -32 +; CHECK: .cfi_def_cfa w29, 32 +; CHECK: .cfi_offset w30, -24 +; CHECK: .cfi_offset w29, -32 +; CHECK: .cfi_offset w19, -8 +; CHECK: .cfi_offset w20, -16 ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] -; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] +; CHECK: ldr w[[IARG:[0-9]+]], [x29, #40] +; CHECK: ldr d[[DARG:[0-9]+]], [x29, #56] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; CHECK: mov w9, w0 ; CHECK: mov x10, sp @@ -309,14 +309,14 @@ ; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 ; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through frame pointer -; CHECK: ldur w[[ILOC:[0-9]+]], [x29, #-20] +; CHECK: ldur w[[ILOC:[0-9]+]], [x29, #-4] ; Check correct accessing of the VLA variable through the base pointer ; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK: sub sp, x29, #16 // =16 -; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldp x20, x19, [sp], #32 +; CHECK: mov sp, x29 +; CHECK: ldp x20, x19, [sp, #16] +; CHECK: ldp x29, x30, [sp], #32 ; CHECK: ret ; CHECK: .cfi_endproc @@ -385,11 +385,11 @@ ; CHECK-LABEL: vla_dynamicrealign_call ; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK: str x21, [sp, #-48]! -; CHECK: stp x20, x19, [sp, #16] +; CHECK: stp x29, x30, [sp, #-48]! +; CHECK: str x21, [sp, #16] +; CHECK: stp x20, x19, [sp, #32] ; Check that the frame pointer is created: -; CHECK: stp x29, x30, [sp, #32] -; CHECK: add x29, sp, #32 +; CHECK: mov x29, sp ; Check that the stack pointer gets re-aligned to 128 ; bytes & the base pointer (x19) gets initialized to ; this 128-byte aligned area for local variables & @@ -398,15 +398,15 @@ ; CHECK: and sp, x9, #0xffffffffffffff80 ; CHECK: mov x19, sp ; Check correctness of cfi pseudo-instructions -; CHECK: .cfi_def_cfa w29, 16 -; CHECK: .cfi_offset w30, -8 -; CHECK: .cfi_offset w29, -16 -; CHECK: .cfi_offset w19, -24 -; CHECK: .cfi_offset w20, -32 -; CHECK: .cfi_offset w21, -48 +; CHECK: .cfi_def_cfa w29, 48 +; CHECK: .cfi_offset w30, -40 +; CHECK: .cfi_offset w29, -48 +; CHECK: .cfi_offset w19, -8 +; CHECK: .cfi_offset w20, -16 +; CHECK: .cfi_offset w21, -32 ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] -; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] +; CHECK: ldr w[[IARG:[0-9]+]], [x29, #56] +; CHECK: ldr d[[DARG:[0-9]+]], [x29, #72] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). ; CHECK: mov w9, w0 @@ -421,21 +421,21 @@ ; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK: sub sp, x29, #32 -; CHECK: ldp x29, x30, [sp, #32] -; CHECK: ldp x20, x19, [sp, #16] -; CHECK: ldr x21, [sp], #48 +; CHECK: mov sp, x29 +; CHECK: ldp x20, x19, [sp, #32] +; CHECK: ldr x21, [sp, #16] +; CHECK: ldp x29, x30, [sp], #48 ; CHECK: ret ; CHECK: .cfi_endproc ; CHECK-MACHO-LABEL: _vla_dynamicrealign_call: ; CHECK-MACHO: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK-MACHO: stp x22, x21, [sp, #-48]! -; CHECK-MACHO: stp x20, x19, [sp, #16] +; CHECK-MACHO: stp x29, x30, [sp, #-48]! +; CHECK-MACHO: stp x22, x21, [sp, #16] ; Check that the frame pointer is created: -; CHECK-MACHO: stp x29, x30, [sp, #32] -; CHECK-MACHO: add x29, sp, #32 +; CHECK-MACHO: stp x20, x19, [sp, #32] +; CHECK-MACHO: mov x29, sp ; Check that the stack pointer gets re-aligned to 128 ; bytes & the base pointer (x19) gets initialized to ; this 128-byte aligned area for local variables & @@ -444,16 +444,16 @@ ; CHECK-MACHO: and sp, x9, #0xffffffffffffff80 ; CHECK-MACHO: mov x19, sp ; Check correctness of cfi pseudo-instructions -; CHECK-MACHO: .cfi_def_cfa w29, 16 -; CHECK-MACHO: .cfi_offset w30, -8 -; CHECK-MACHO: .cfi_offset w29, -16 -; CHECK-MACHO: .cfi_offset w19, -24 -; CHECK-MACHO: .cfi_offset w20, -32 -; CHECK-MACHO: .cfi_offset w21, -40 -; CHECK-MACHO: .cfi_offset w22, -48 +; CHECK-MACHO: .cfi_def_cfa w29, 48 +; CHECK-MACHO: .cfi_offset w30, -40 +; CHECK-MACHO: .cfi_offset w29, -48 +; CHECK-MACHO: .cfi_offset w19, -8 +; CHECK-MACHO: .cfi_offset w20, -16 +; CHECK-MACHO: .cfi_offset w21, -24 +; CHECK-MACHO: .cfi_offset w22, -32 ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] -; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] +; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #52] +; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #64] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). ; CHECK-MACHO: mov w9, w0 @@ -468,10 +468,10 @@ ; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK-MACHO: sub sp, x29, #32 -; CHECK-MACHO: ldp x29, x30, [sp, #32] -; CHECK-MACHO: ldp x20, x19, [sp, #16] -; CHECK-MACHO: ldp x22, x21, [sp], #48 +; CHECK-MACHO: mov sp, x29 +; CHECK-MACHO: ldp x20, x19, [sp, #32] +; CHECK-MACHO: ldp x22, x21, [sp, #16] +; CHECK-MACHO: ldp x29, x30, [sp], #48 ; CHECK-MACHO: ret ; CHECK-MACHO: .cfi_endproc @@ -493,10 +493,10 @@ ; CHECK-LABEL: vla_dynamicrealign_nocall ; Check that used callee-saved registers are saved -; CHECK: str x19, [sp, #-32]! +; CHECK: stp x29, x30, [sp, #-32]! +; CHECK: str x19, [sp, #16] ; Check that the frame pointer is created: -; CHECK: stp x29, x30, [sp, #16] -; CHECK: add x29, sp, #16 +; CHECK: mov x29, sp ; Check that the stack pointer gets re-aligned to 128 ; bytes & the base pointer (x19) gets initialized to ; this 128-byte aligned area for local variables & @@ -505,8 +505,8 @@ ; CHECK: and sp, x9, #0xffffffffffffff80 ; CHECK: mov x19, sp ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] -; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] +; CHECK: ldr w[[IARG:[0-9]+]], [x29, #40] +; CHECK: ldr d[[DARG:[0-9]+]], [x29, #56] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). ; CHECK: mov w9, w0 @@ -521,17 +521,17 @@ ; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK: sub sp, x29, #16 -; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldr x19, [sp], #32 +; CHECK: mov sp, x29 +; CHECK: ldr x19, [sp, #16] +; CHECK: ldp x29, x30, [sp], #32 ; CHECK: ret ; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall: ; Check that used callee-saved registers are saved -; CHECK-MACHO: stp x20, x19, [sp, #-32]! +; CHECK-MACHO: stp x29, x30, [sp, #-32]! ; Check that the frame pointer is created: -; CHECK-MACHO: stp x29, x30, [sp, #16] -; CHECK-MACHO: add x29, sp, #16 +; CHECK-MACHO: stp x20, x19, [sp, #16] +; CHECK-MACHO: mov x29, sp ; Check that the stack pointer gets re-aligned to 128 ; bytes & the base pointer (x19) gets initialized to ; this 128-byte aligned area for local variables & @@ -540,8 +540,8 @@ ; CHECK-MACHO: and sp, x9, #0xffffffffffffff80 ; CHECK-MACHO: mov x19, sp ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] -; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] +; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #36] +; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #48] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). ; CHECK-MACHO: mov w9, w0 @@ -556,9 +556,9 @@ ; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK-MACHO: sub sp, x29, #16 -; CHECK-MACHO: ldp x29, x30, [sp, #16] -; CHECK-MACHO: ldp x20, x19, [sp], #32 +; CHECK-MACHO: mov sp, x29 +; CHECK-MACHO: ldp x20, x19, [sp, #16] +; CHECK-MACHO: ldp x29, x30, [sp], #32 ; CHECK-MACHO: ret @@ -579,10 +579,10 @@ ; CHECK-LABEL: vla_dynamicrealign_nocall_large_align ; Check that used callee-saved registers are saved -; CHECK: stp x28, x19, [sp, #-32]! +; CHECK: stp x29, x30, [sp, #-32]! +; CHECK: str x19, [sp, #16] ; Check that the frame pointer is created: -; CHECK: stp x29, x30, [sp, #16] -; CHECK: add x29, sp, #16 +; CHECK: mov x29, sp ; Check that the stack pointer gets re-aligned to 128 ; bytes & the base pointer (x19) gets initialized to ; this 128-byte aligned area for local variables & @@ -591,8 +591,8 @@ ; CHECK: and sp, x9, #0xffffffffffff8000 ; CHECK: mov x19, sp ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] -; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] +; CHECK: ldr w[[IARG:[0-9]+]], [x29, #40] +; CHECK: ldr d[[DARG:[0-9]+]], [x29, #56] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). ; CHECK: mov w9, w0 @@ -607,17 +607,17 @@ ; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK: sub sp, x29, #16 -; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldp x28, x19, [sp], #32 +; CHECK: mov sp, x29 +; CHECK: ldr x19, [sp, #16] +; CHECK: ldp x29, x30, [sp], #32 ; CHECK: ret ; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall_large_align: ; Check that used callee-saved registers are saved -; CHECK-MACHO: stp x20, x19, [sp, #-32]! +; CHECK-MACHO: stp x29, x30, [sp, #-32]! ; Check that the frame pointer is created: -; CHECK-MACHO: stp x29, x30, [sp, #16] -; CHECK-MACHO: add x29, sp, #16 +; CHECK-MACHO: stp x20, x19, [sp, #16] +; CHECK-MACHO: mov x29, sp ; Check that the stack pointer gets re-aligned to 128 ; bytes & the base pointer (x19) gets initialized to ; this 128-byte aligned area for local variables & @@ -626,8 +626,8 @@ ; CHECK-MACHO: and sp, x9, #0xffffffffffff8000 ; CHECK-MACHO: mov x19, sp ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] -; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] +; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #36] +; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #48] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). ; CHECK-MACHO: mov w9, w0 @@ -642,9 +642,9 @@ ; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK-MACHO: sub sp, x29, #16 -; CHECK-MACHO: ldp x29, x30, [sp, #16] -; CHECK-MACHO: ldp x20, x19, [sp], #32 +; CHECK-MACHO: mov sp, x29 +; CHECK-MACHO: ldp x20, x19, [sp, #16] +; CHECK-MACHO: ldp x29, x30, [sp], #32 ; CHECK-MACHO: ret Index: test/CodeGen/AArch64/aarch64-vector-pcs.mir =================================================================== --- test/CodeGen/AArch64/aarch64-vector-pcs.mir +++ test/CodeGen/AArch64/aarch64-vector-pcs.mir @@ -176,12 +176,12 @@ ; CHECK-NEXT: frame-setup STPQi killed $q13, killed $q12, $sp, 11 ; CHECK-NEXT: frame-setup STPQi killed $q11, killed $q10, $sp, 13 ; CHECK-NEXT: frame-setup STPQi killed $q9, killed $q8, $sp, 15 - ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x27, $sp, 34 :: (store 8 into %stack.{{[0-9]+}}), (store 8 into %stack.{{[0-9]+}}) - ; CHECK-NEXT: frame-setup STPXi killed $x26, killed $x25, $sp, 36 - ; CHECK-NEXT: frame-setup STPXi killed $x24, killed $x23, $sp, 38 - ; CHECK-NEXT: frame-setup STPXi killed $x22, killed $x21, $sp, 40 - ; CHECK-NEXT: frame-setup STPXi killed $x20, killed $x19, $sp, 42 - ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 44 + ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 34 :: (store 8 into %stack.{{[0-9]+}}), (store 8 into %stack.{{[0-9]+}}) + ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x27, $sp, 36 + ; CHECK-NEXT: frame-setup STPXi killed $x26, killed $x25, $sp, 38 + ; CHECK-NEXT: frame-setup STPXi killed $x24, killed $x23, $sp, 40 + ; CHECK-NEXT: frame-setup STPXi killed $x22, killed $x21, $sp, 42 + ; CHECK-NEXT: frame-setup STPXi killed $x20, killed $x19, $sp, 44 ... --- @@ -242,12 +242,12 @@ ; CHECK-NEXT: frame-setup STPQi killed $q13, killed $q12, $sp, 10 ; CHECK-NEXT: frame-setup STPQi killed $q11, killed $q10, $sp, 12 ; CHECK-NEXT: frame-setup STPQi killed $q9, killed $q8, $sp, 14 - ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x27, $sp, 32 :: (store 8 into %stack.{{[0-9]+}}), (store 8 into %stack.{{[0-9]+}}) - ; CHECK-NEXT: frame-setup STPXi killed $x26, killed $x25, $sp, 34 - ; CHECK-NEXT: frame-setup STPXi killed $x24, killed $x23, $sp, 36 - ; CHECK-NEXT: frame-setup STPXi killed $x22, killed $x21, $sp, 38 - ; CHECK-NEXT: frame-setup STPXi killed $x20, killed $x19, $sp, 40 - ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 42 + ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 32 :: (store 8 into %stack.{{[0-9]+}}), (store 8 into %stack.{{[0-9]+}}) + ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x27, $sp, 34 + ; CHECK-NEXT: frame-setup STPXi killed $x26, killed $x25, $sp, 36 + ; CHECK-NEXT: frame-setup STPXi killed $x24, killed $x23, $sp, 38 + ; CHECK-NEXT: frame-setup STPXi killed $x22, killed $x21, $sp, 40 + ; CHECK-NEXT: frame-setup STPXi killed $x20, killed $x19, $sp, 42 ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 176, 0 ... Index: test/CodeGen/AArch64/addsub-constant-folding.ll =================================================================== --- test/CodeGen/AArch64/addsub-constant-folding.ll +++ test/CodeGen/AArch64/addsub-constant-folding.ll @@ -19,15 +19,15 @@ define i32 @add_const_add_const_extrause(i32 %arg) { ; CHECK-LABEL: add_const_add_const_extrause: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x19, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: add w0, w0, #8 // =8 ; CHECK-NEXT: bl use ; CHECK-NEXT: add w0, w19, #10 // =10 -; CHECK-NEXT: ldp x19, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %t0 = add i32 %arg, 8 call void @use(i32 %t0) @@ -96,15 +96,15 @@ define i32 @add_const_sub_const_extrause(i32 %arg) { ; CHECK-LABEL: add_const_sub_const_extrause: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x19, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: add w0, w0, #8 // =8 ; CHECK-NEXT: bl use ; CHECK-NEXT: add w0, w19, #6 // =6 -; CHECK-NEXT: ldp x19, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %t0 = add i32 %arg, 8 call void @use(i32 %t0) @@ -174,16 +174,16 @@ define i32 @add_const_const_sub_extrause(i32 %arg) { ; CHECK-LABEL: add_const_const_sub_extrause: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x19, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: add w0, w0, #8 // =8 ; CHECK-NEXT: bl use ; CHECK-NEXT: mov w8, #-6 ; CHECK-NEXT: sub w0, w8, w19 -; CHECK-NEXT: ldp x19, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %t0 = add i32 %arg, 8 call void @use(i32 %t0) @@ -252,15 +252,15 @@ define i32 @sub_const_add_const_extrause(i32 %arg) { ; CHECK-LABEL: sub_const_add_const_extrause: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x19, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: sub w0, w0, #8 // =8 ; CHECK-NEXT: bl use ; CHECK-NEXT: sub w0, w19, #6 // =6 -; CHECK-NEXT: ldp x19, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %t0 = sub i32 %arg, 8 call void @use(i32 %t0) @@ -329,15 +329,15 @@ define i32 @sub_const_sub_const_extrause(i32 %arg) { ; CHECK-LABEL: sub_const_sub_const_extrause: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x19, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: sub w0, w0, #8 // =8 ; CHECK-NEXT: bl use ; CHECK-NEXT: sub w0, w19, #10 // =10 -; CHECK-NEXT: ldp x19, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %t0 = sub i32 %arg, 8 call void @use(i32 %t0) @@ -407,16 +407,16 @@ define i32 @sub_const_const_sub_extrause(i32 %arg) { ; CHECK-LABEL: sub_const_const_sub_extrause: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x19, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: sub w0, w0, #8 // =8 ; CHECK-NEXT: bl use ; CHECK-NEXT: mov w8, #10 ; CHECK-NEXT: sub w0, w8, w19 -; CHECK-NEXT: ldp x19, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %t0 = sub i32 %arg, 8 call void @use(i32 %t0) @@ -486,17 +486,17 @@ define i32 @const_sub_add_const_extrause(i32 %arg) { ; CHECK-LABEL: const_sub_add_const_extrause: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x19, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w8, #8 ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: sub w0, w8, w0 ; CHECK-NEXT: bl use ; CHECK-NEXT: mov w8, #10 ; CHECK-NEXT: sub w0, w8, w19 -; CHECK-NEXT: ldp x19, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %t0 = sub i32 8, %arg call void @use(i32 %t0) @@ -566,17 +566,17 @@ define i32 @const_sub_sub_const_extrause(i32 %arg) { ; CHECK-LABEL: const_sub_sub_const_extrause: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x19, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w8, #8 ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: sub w0, w8, w0 ; CHECK-NEXT: bl use ; CHECK-NEXT: mov w8, #6 ; CHECK-NEXT: sub w0, w8, w19 -; CHECK-NEXT: ldp x19, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %t0 = sub i32 8, %arg call void @use(i32 %t0) @@ -645,17 +645,17 @@ define i32 @const_sub_const_sub_extrause(i32 %arg) { ; CHECK-LABEL: const_sub_const_sub_extrause: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x19, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w8, #8 ; CHECK-NEXT: sub w19, w8, w0 ; CHECK-NEXT: mov w0, w19 ; CHECK-NEXT: bl use ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: sub w0, w8, w19 -; CHECK-NEXT: ldp x19, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %t0 = sub i32 8, %arg call void @use(i32 %t0) Index: test/CodeGen/AArch64/alloca.ll =================================================================== --- test/CodeGen/AArch64/alloca.ll +++ test/CodeGen/AArch64/alloca.ll @@ -117,15 +117,15 @@ ; CHECK-MACHO-LABEL: test_alloca_large_frame: -; CHECK: stp x28, x19, [sp, #-32]! -; CHECK: stp x29, x30, [sp, #16] -; CHECK: add x29, sp, #16 +; CHECK: stp x29, x30, [sp, #-32]! +; CHECK: stp x28, x19, [sp, #16] +; CHECK: mov x29, sp ; CHECK: sub sp, sp, #1953, lsl #12 ; CHECK: sub sp, sp, #512 -; CHECK-MACHO: stp x20, x19, [sp, #-32]! -; CHECK-MACHO: stp x29, x30, [sp, #16] -; CHECK-MACHO: add x29, sp, #16 +; CHECK-MACHO: stp x29, x30, [sp, #-32]! +; CHECK-MACHO: stp x20, x19, [sp, #16] +; CHECK-MACHO: mov x29, sp ; CHECK-MACHO: sub sp, sp, #1953, lsl #12 ; CHECK-MACHO: sub sp, sp, #512 @@ -136,13 +136,13 @@ ret void -; CHECK: sub sp, x29, #16 -; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldp x28, x19, [sp], #32 +; CHECK: mov sp, x29 +; CHECK: ldp x28, x19, [sp, #16] +; CHECK: ldp x29, x30, [sp], #32 -; CHECK-MACHO: sub sp, x29, #16 -; CHECK-MACHO: ldp x29, x30, [sp, #16] -; CHECK-MACHO: ldp x20, x19, [sp], #32 +; CHECK-MACHO: mov sp, x29 +; CHECK-MACHO: ldp x20, x19, [sp, #16] +; CHECK-MACHO: ldp x29, x30, [sp], #32 } declare i8* @llvm.stacksave() @@ -152,6 +152,7 @@ ; CHECK-LABEL: test_scoped_alloca: %sp = call i8* @llvm.stacksave() +; CHECK: mov x29, sp ; CHECK: mov [[SAVED_SP:x[0-9]+]], sp ; CHECK: mov [[OLDSP:x[0-9]+]], sp Index: test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll =================================================================== --- test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll +++ test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll @@ -1,8 +1,8 @@ ; RUN: llc -mtriple=arm64-eabi -mcpu=cyclone < %s | FileCheck %s ; CHECK: foo -; CHECK-DAG: stur w[[REG0:[0-9]+]], [x29, #-24] -; CHECK-DAG: stur w[[REG0]], [x29, #-20] +; CHECK-DAG: str w[[REG0:[0-9]+]], [x29, #24] +; CHECK-DAG: str w[[REG0]], [x29, #28] define i32 @foo(i32 %a) nounwind { %retval = alloca i32, align 4 %a.addr = alloca i32, align 4 Index: test/CodeGen/AArch64/arm64-anyregcc.ll =================================================================== --- test/CodeGen/AArch64/arm64-anyregcc.ll +++ test/CodeGen/AArch64/arm64-anyregcc.ll @@ -436,20 +436,20 @@ ; CHECK-NEXT: .short {{[0-9]+}} ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 -; Loc 3: Arg2 spilled to FP -96 +; Loc 3: Arg2 spilled to FP -16 ; CHECK-NEXT: .byte 3 ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short 29 ; CHECK-NEXT: .short 0 -; CHECK-NEXT: .long -96 -; Loc 4: Arg3 spilled to FP - 88 +; CHECK-NEXT: .long -16 +; Loc 4: Arg3 spilled to FP - 8 ; CHECK-NEXT: .byte 3 ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short 29 ; CHECK-NEXT: .short 0 -; CHECK-NEXT: .long -88 +; CHECK-NEXT: .long -8 define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { entry: tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind Index: test/CodeGen/AArch64/arm64-frame-index.ll =================================================================== --- test/CodeGen/AArch64/arm64-frame-index.ll +++ test/CodeGen/AArch64/arm64-frame-index.ll @@ -5,7 +5,7 @@ entry: ; CHECK-LABEL: t1: ; CHECK-NOT: add x{{[0-9]+}}, sp -; CHECK: stp x28, x27, [sp, #-16]! +; CHECK: stp x29, x30, [sp, #-16]! %v = alloca [288 x i32], align 4 unreachable } Index: test/CodeGen/AArch64/arm64-shrink-wrapping.ll =================================================================== --- test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -71,14 +71,14 @@ ; ENABLE: ; %bb.0: ; %entry ; ENABLE-NEXT: cbz w0, LBB1_4 ; ENABLE-NEXT: ; %bb.1: ; %for.body.preheader -; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; ENABLE-NEXT: add x29, sp, #16 ; =16 -; ENABLE-NEXT: .cfi_def_cfa w29, 16 -; ENABLE-NEXT: .cfi_offset w30, -8 -; ENABLE-NEXT: .cfi_offset w29, -16 -; ENABLE-NEXT: .cfi_offset w19, -24 -; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: stp x29, x30, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: mov x29, sp +; ENABLE-NEXT: .cfi_def_cfa w29, 32 +; ENABLE-NEXT: .cfi_offset w30, -24 +; ENABLE-NEXT: .cfi_offset w29, -32 +; ENABLE-NEXT: .cfi_offset w19, -8 +; ENABLE-NEXT: .cfi_offset w20, -16 ; ENABLE-NEXT: mov w19, wzr ; ENABLE-NEXT: mov w20, #10 ; ENABLE-NEXT: LBB1_2: ; %for.body @@ -89,8 +89,8 @@ ; ENABLE-NEXT: b.ne LBB1_2 ; ENABLE-NEXT: ; %bb.3: ; %for.end ; ENABLE-NEXT: lsl w0, w19, #3 -; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x29, x30, [sp], #32 ; 16-byte Folded Reload ; ENABLE-NEXT: ret ; ENABLE-NEXT: LBB1_4: ; %if.else ; ENABLE-NEXT: lsl w0, w1, #1 @@ -98,14 +98,14 @@ ; ; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop: ; DISABLE: ; %bb.0: ; %entry -; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; DISABLE-NEXT: add x29, sp, #16 ; =16 -; DISABLE-NEXT: .cfi_def_cfa w29, 16 -; DISABLE-NEXT: .cfi_offset w30, -8 -; DISABLE-NEXT: .cfi_offset w29, -16 -; DISABLE-NEXT: .cfi_offset w19, -24 -; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: stp x29, x30, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: mov x29, sp +; DISABLE-NEXT: .cfi_def_cfa w29, 32 +; DISABLE-NEXT: .cfi_offset w30, -24 +; DISABLE-NEXT: .cfi_offset w29, -32 +; DISABLE-NEXT: .cfi_offset w19, -8 +; DISABLE-NEXT: .cfi_offset w20, -16 ; DISABLE-NEXT: cbz w0, LBB1_4 ; DISABLE-NEXT: ; %bb.1: ; %for.body.preheader ; DISABLE-NEXT: mov w19, wzr @@ -122,8 +122,8 @@ ; DISABLE-NEXT: LBB1_4: ; %if.else ; DISABLE-NEXT: lsl w0, w1, #1 ; DISABLE-NEXT: LBB1_5: ; %if.end -; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x29, x30, [sp], #32 ; 16-byte Folded Reload ; DISABLE-NEXT: ret entry: %tobool = icmp eq i32 %cond, 0 @@ -158,14 +158,14 @@ define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { ; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2: ; ENABLE: ; %bb.0: ; %entry -; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; ENABLE-NEXT: add x29, sp, #16 ; =16 -; ENABLE-NEXT: .cfi_def_cfa w29, 16 -; ENABLE-NEXT: .cfi_offset w30, -8 -; ENABLE-NEXT: .cfi_offset w29, -16 -; ENABLE-NEXT: .cfi_offset w19, -24 -; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: stp x29, x30, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: mov x29, sp +; ENABLE-NEXT: .cfi_def_cfa w29, 32 +; ENABLE-NEXT: .cfi_offset w30, -24 +; ENABLE-NEXT: .cfi_offset w29, -32 +; ENABLE-NEXT: .cfi_offset w19, -8 +; ENABLE-NEXT: .cfi_offset w20, -16 ; ENABLE-NEXT: mov w19, wzr ; ENABLE-NEXT: mov w20, #10 ; ENABLE-NEXT: LBB2_1: ; %for.body @@ -176,20 +176,20 @@ ; ENABLE-NEXT: b.ne LBB2_1 ; ENABLE-NEXT: ; %bb.2: ; %for.end ; ENABLE-NEXT: mov w0, w19 -; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x29, x30, [sp], #32 ; 16-byte Folded Reload ; ENABLE-NEXT: ret ; ; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2: ; DISABLE: ; %bb.0: ; %entry -; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; DISABLE-NEXT: add x29, sp, #16 ; =16 -; DISABLE-NEXT: .cfi_def_cfa w29, 16 -; DISABLE-NEXT: .cfi_offset w30, -8 -; DISABLE-NEXT: .cfi_offset w29, -16 -; DISABLE-NEXT: .cfi_offset w19, -24 -; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: stp x29, x30, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: mov x29, sp +; DISABLE-NEXT: .cfi_def_cfa w29, 32 +; DISABLE-NEXT: .cfi_offset w30, -24 +; DISABLE-NEXT: .cfi_offset w29, -32 +; DISABLE-NEXT: .cfi_offset w19, -8 +; DISABLE-NEXT: .cfi_offset w20, -16 ; DISABLE-NEXT: mov w19, wzr ; DISABLE-NEXT: mov w20, #10 ; DISABLE-NEXT: LBB2_1: ; %for.body @@ -200,8 +200,8 @@ ; DISABLE-NEXT: b.ne LBB2_1 ; DISABLE-NEXT: ; %bb.2: ; %for.end ; DISABLE-NEXT: mov w0, w19 -; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x29, x30, [sp], #32 ; 16-byte Folded Reload ; DISABLE-NEXT: ret entry: br label %for.body @@ -226,14 +226,14 @@ ; ENABLE: ; %bb.0: ; %entry ; ENABLE-NEXT: cbz w0, LBB3_4 ; ENABLE-NEXT: ; %bb.1: ; %for.body.preheader -; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; ENABLE-NEXT: add x29, sp, #16 ; =16 -; ENABLE-NEXT: .cfi_def_cfa w29, 16 -; ENABLE-NEXT: .cfi_offset w30, -8 -; ENABLE-NEXT: .cfi_offset w29, -16 -; ENABLE-NEXT: .cfi_offset w19, -24 -; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: stp x29, x30, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: mov x29, sp +; ENABLE-NEXT: .cfi_def_cfa w29, 32 +; ENABLE-NEXT: .cfi_offset w30, -24 +; ENABLE-NEXT: .cfi_offset w29, -32 +; ENABLE-NEXT: .cfi_offset w19, -8 +; ENABLE-NEXT: .cfi_offset w20, -16 ; ENABLE-NEXT: mov w19, wzr ; ENABLE-NEXT: mov w20, #10 ; ENABLE-NEXT: LBB3_2: ; %for.body @@ -245,8 +245,8 @@ ; ENABLE-NEXT: ; %bb.3: ; %for.end ; ENABLE-NEXT: bl _somethingElse ; ENABLE-NEXT: lsl w0, w19, #3 -; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x29, x30, [sp], #32 ; 16-byte Folded Reload ; ENABLE-NEXT: ret ; ENABLE-NEXT: LBB3_4: ; %if.else ; ENABLE-NEXT: lsl w0, w1, #1 @@ -254,14 +254,14 @@ ; ; DISABLE-LABEL: loopInfoSaveOutsideLoop: ; DISABLE: ; %bb.0: ; %entry -; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; DISABLE-NEXT: add x29, sp, #16 ; =16 -; DISABLE-NEXT: .cfi_def_cfa w29, 16 -; DISABLE-NEXT: .cfi_offset w30, -8 -; DISABLE-NEXT: .cfi_offset w29, -16 -; DISABLE-NEXT: .cfi_offset w19, -24 -; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: stp x29, x30, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: mov x29, sp +; DISABLE-NEXT: .cfi_def_cfa w29, 32 +; DISABLE-NEXT: .cfi_offset w30, -24 +; DISABLE-NEXT: .cfi_offset w29, -32 +; DISABLE-NEXT: .cfi_offset w19, -8 +; DISABLE-NEXT: .cfi_offset w20, -16 ; DISABLE-NEXT: cbz w0, LBB3_4 ; DISABLE-NEXT: ; %bb.1: ; %for.body.preheader ; DISABLE-NEXT: mov w19, wzr @@ -279,8 +279,8 @@ ; DISABLE-NEXT: LBB3_4: ; %if.else ; DISABLE-NEXT: lsl w0, w1, #1 ; DISABLE-NEXT: LBB3_5: ; %if.end -; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x29, x30, [sp], #32 ; 16-byte Folded Reload ; DISABLE-NEXT: ret entry: %tobool = icmp eq i32 %cond, 0 @@ -318,9 +318,9 @@ ; ENABLE: ; %bb.0: ; %entry ; ENABLE-NEXT: cbz w0, LBB4_4 ; ENABLE-NEXT: ; %bb.1: ; %if.then -; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: stp x29, x30, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: mov x29, sp ; ENABLE-NEXT: bl _somethingElse ; ENABLE-NEXT: mov w19, wzr ; ENABLE-NEXT: mov w20, #10 @@ -332,8 +332,8 @@ ; ENABLE-NEXT: b.ne LBB4_2 ; ENABLE-NEXT: ; %bb.3: ; %for.end ; ENABLE-NEXT: lsl w0, w19, #3 -; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x29, x30, [sp], #32 ; 16-byte Folded Reload ; ENABLE-NEXT: ret ; ENABLE-NEXT: LBB4_4: ; %if.else ; ENABLE-NEXT: lsl w0, w1, #1 @@ -341,9 +341,9 @@ ; ; DISABLE-LABEL: loopInfoRestoreOutsideLoop: ; DISABLE: ; %bb.0: ; %entry -; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: stp x29, x30, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: mov x29, sp ; DISABLE-NEXT: cbz w0, LBB4_4 ; DISABLE-NEXT: ; %bb.1: ; %if.then ; DISABLE-NEXT: bl _somethingElse @@ -361,8 +361,8 @@ ; DISABLE-NEXT: LBB4_4: ; %if.else ; DISABLE-NEXT: lsl w0, w1, #1 ; DISABLE-NEXT: LBB4_5: ; %if.end -; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x29, x30, [sp], #32 ; 16-byte Folded Reload ; DISABLE-NEXT: ret entry: %tobool = icmp eq i32 %cond, 0 @@ -701,14 +701,14 @@ define void @infiniteloop() { ; ENABLE-LABEL: infiniteloop: ; ENABLE: ; %bb.0: ; %entry -; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; ENABLE-NEXT: add x29, sp, #16 ; =16 -; ENABLE-NEXT: .cfi_def_cfa w29, 16 -; ENABLE-NEXT: .cfi_offset w30, -8 -; ENABLE-NEXT: .cfi_offset w29, -16 -; ENABLE-NEXT: .cfi_offset w19, -24 -; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: stp x29, x30, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: mov x29, sp +; ENABLE-NEXT: .cfi_def_cfa w29, 32 +; ENABLE-NEXT: .cfi_offset w30, -24 +; ENABLE-NEXT: .cfi_offset w29, -32 +; ENABLE-NEXT: .cfi_offset w19, -8 +; ENABLE-NEXT: .cfi_offset w20, -16 ; ENABLE-NEXT: cbnz wzr, LBB10_3 ; ENABLE-NEXT: ; %bb.1: ; %if.then ; ENABLE-NEXT: sub x19, sp, #16 ; =16 @@ -721,21 +721,21 @@ ; ENABLE-NEXT: str w20, [x19] ; ENABLE-NEXT: b LBB10_2 ; ENABLE-NEXT: LBB10_3: ; %if.end -; ENABLE-NEXT: sub sp, x29, #16 ; =16 -; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: mov sp, x29 +; ENABLE-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x29, x30, [sp], #32 ; 16-byte Folded Reload ; ENABLE-NEXT: ret ; ; DISABLE-LABEL: infiniteloop: ; DISABLE: ; %bb.0: ; %entry -; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; DISABLE-NEXT: add x29, sp, #16 ; =16 -; DISABLE-NEXT: .cfi_def_cfa w29, 16 -; DISABLE-NEXT: .cfi_offset w30, -8 -; DISABLE-NEXT: .cfi_offset w29, -16 -; DISABLE-NEXT: .cfi_offset w19, -24 -; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: stp x29, x30, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: mov x29, sp +; DISABLE-NEXT: .cfi_def_cfa w29, 32 +; DISABLE-NEXT: .cfi_offset w30, -24 +; DISABLE-NEXT: .cfi_offset w29, -32 +; DISABLE-NEXT: .cfi_offset w19, -8 +; DISABLE-NEXT: .cfi_offset w20, -16 ; DISABLE-NEXT: cbnz wzr, LBB10_3 ; DISABLE-NEXT: ; %bb.1: ; %if.then ; DISABLE-NEXT: sub x19, sp, #16 ; =16 @@ -748,9 +748,9 @@ ; DISABLE-NEXT: str w20, [x19] ; DISABLE-NEXT: b LBB10_2 ; DISABLE-NEXT: LBB10_3: ; %if.end -; DISABLE-NEXT: sub sp, x29, #16 ; =16 -; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: mov sp, x29 +; DISABLE-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x29, x30, [sp], #32 ; 16-byte Folded Reload ; DISABLE-NEXT: ret entry: br i1 undef, label %if.then, label %if.end @@ -774,14 +774,14 @@ define void @infiniteloop2() { ; ENABLE-LABEL: infiniteloop2: ; ENABLE: ; %bb.0: ; %entry -; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; ENABLE-NEXT: add x29, sp, #16 ; =16 -; ENABLE-NEXT: .cfi_def_cfa w29, 16 -; ENABLE-NEXT: .cfi_offset w30, -8 -; ENABLE-NEXT: .cfi_offset w29, -16 -; ENABLE-NEXT: .cfi_offset w19, -24 -; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: stp x29, x30, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: mov x29, sp +; ENABLE-NEXT: .cfi_def_cfa w29, 32 +; ENABLE-NEXT: .cfi_offset w30, -24 +; ENABLE-NEXT: .cfi_offset w29, -32 +; ENABLE-NEXT: .cfi_offset w19, -8 +; ENABLE-NEXT: .cfi_offset w20, -16 ; ENABLE-NEXT: cbnz wzr, LBB11_3 ; ENABLE-NEXT: ; %bb.1: ; %if.then ; ENABLE-NEXT: sub x8, sp, #16 ; =16 @@ -800,21 +800,21 @@ ; ENABLE-NEXT: mov w9, #1 ; ENABLE-NEXT: b LBB11_2 ; ENABLE-NEXT: LBB11_3: ; %if.end -; ENABLE-NEXT: sub sp, x29, #16 ; =16 -; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: mov sp, x29 +; ENABLE-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x29, x30, [sp], #32 ; 16-byte Folded Reload ; ENABLE-NEXT: ret ; ; DISABLE-LABEL: infiniteloop2: ; DISABLE: ; %bb.0: ; %entry -; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; DISABLE-NEXT: add x29, sp, #16 ; =16 -; DISABLE-NEXT: .cfi_def_cfa w29, 16 -; DISABLE-NEXT: .cfi_offset w30, -8 -; DISABLE-NEXT: .cfi_offset w29, -16 -; DISABLE-NEXT: .cfi_offset w19, -24 -; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: stp x29, x30, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: mov x29, sp +; DISABLE-NEXT: .cfi_def_cfa w29, 32 +; DISABLE-NEXT: .cfi_offset w30, -24 +; DISABLE-NEXT: .cfi_offset w29, -32 +; DISABLE-NEXT: .cfi_offset w19, -8 +; DISABLE-NEXT: .cfi_offset w20, -16 ; DISABLE-NEXT: cbnz wzr, LBB11_3 ; DISABLE-NEXT: ; %bb.1: ; %if.then ; DISABLE-NEXT: sub x8, sp, #16 ; =16 @@ -833,9 +833,9 @@ ; DISABLE-NEXT: mov w9, #1 ; DISABLE-NEXT: b LBB11_2 ; DISABLE-NEXT: LBB11_3: ; %if.end -; DISABLE-NEXT: sub sp, x29, #16 ; =16 -; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: mov sp, x29 +; DISABLE-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x29, x30, [sp], #32 ; 16-byte Folded Reload ; DISABLE-NEXT: ret entry: br i1 undef, label %if.then, label %if.end @@ -1012,28 +1012,28 @@ define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3, i32* %ptr4, i32* %ptr5, i32* %ptr6) { ; ENABLE-LABEL: stack_realign2: ; ENABLE: ; %bb.0: -; ENABLE-NEXT: stp x28, x27, [sp, #-96]! ; 16-byte Folded Spill -; ENABLE-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill -; ENABLE-NEXT: stp x24, x23, [sp, #32] ; 16-byte Folded Spill -; ENABLE-NEXT: stp x22, x21, [sp, #48] ; 16-byte Folded Spill -; ENABLE-NEXT: stp x20, x19, [sp, #64] ; 16-byte Folded Spill -; ENABLE-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill -; ENABLE-NEXT: add x29, sp, #80 ; =80 +; ENABLE-NEXT: stp x29, x30, [sp, #-96]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x28, x27, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: stp x26, x25, [sp, #32] ; 16-byte Folded Spill +; ENABLE-NEXT: stp x24, x23, [sp, #48] ; 16-byte Folded Spill +; ENABLE-NEXT: stp x22, x21, [sp, #64] ; 16-byte Folded Spill +; ENABLE-NEXT: stp x20, x19, [sp, #80] ; 16-byte Folded Spill +; ENABLE-NEXT: mov x29, sp ; ENABLE-NEXT: sub x9, sp, #32 ; =32 ; ENABLE-NEXT: and sp, x9, #0xffffffffffffffe0 -; ENABLE-NEXT: .cfi_def_cfa w29, 16 -; ENABLE-NEXT: .cfi_offset w30, -8 -; ENABLE-NEXT: .cfi_offset w29, -16 -; ENABLE-NEXT: .cfi_offset w19, -24 -; ENABLE-NEXT: .cfi_offset w20, -32 -; ENABLE-NEXT: .cfi_offset w21, -40 -; ENABLE-NEXT: .cfi_offset w22, -48 -; ENABLE-NEXT: .cfi_offset w23, -56 -; ENABLE-NEXT: .cfi_offset w24, -64 -; ENABLE-NEXT: .cfi_offset w25, -72 -; ENABLE-NEXT: .cfi_offset w26, -80 -; ENABLE-NEXT: .cfi_offset w27, -88 -; ENABLE-NEXT: .cfi_offset w28, -96 +; ENABLE-NEXT: .cfi_def_cfa w29, 96 +; ENABLE-NEXT: .cfi_offset w30, -88 +; ENABLE-NEXT: .cfi_offset w29, -96 +; ENABLE-NEXT: .cfi_offset w19, -8 +; ENABLE-NEXT: .cfi_offset w20, -16 +; ENABLE-NEXT: .cfi_offset w21, -24 +; ENABLE-NEXT: .cfi_offset w22, -32 +; ENABLE-NEXT: .cfi_offset w23, -40 +; ENABLE-NEXT: .cfi_offset w24, -48 +; ENABLE-NEXT: .cfi_offset w25, -56 +; ENABLE-NEXT: .cfi_offset w26, -64 +; ENABLE-NEXT: .cfi_offset w27, -72 +; ENABLE-NEXT: .cfi_offset w28, -80 ; ENABLE-NEXT: lsl w8, w0, w1 ; ENABLE-NEXT: lsl w9, w1, w0 ; ENABLE-NEXT: lsr w10, w0, w1 @@ -1060,39 +1060,39 @@ ; ENABLE-NEXT: stp w0, w1, [x2, #4] ; ENABLE-NEXT: stp w16, w11, [x2, #12] ; ENABLE-NEXT: stp w13, w14, [x2, #20] -; ENABLE-NEXT: sub sp, x29, #80 ; =80 -; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x24, x23, [sp, #32] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x28, x27, [sp], #96 ; 16-byte Folded Reload +; ENABLE-NEXT: mov sp, x29 +; ENABLE-NEXT: ldp x20, x19, [sp, #80] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x22, x21, [sp, #64] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x24, x23, [sp, #48] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x26, x25, [sp, #32] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x28, x27, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x29, x30, [sp], #96 ; 16-byte Folded Reload ; ENABLE-NEXT: ret ; ; DISABLE-LABEL: stack_realign2: ; DISABLE: ; %bb.0: -; DISABLE-NEXT: stp x28, x27, [sp, #-96]! ; 16-byte Folded Spill -; DISABLE-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill -; DISABLE-NEXT: stp x24, x23, [sp, #32] ; 16-byte Folded Spill -; DISABLE-NEXT: stp x22, x21, [sp, #48] ; 16-byte Folded Spill -; DISABLE-NEXT: stp x20, x19, [sp, #64] ; 16-byte Folded Spill -; DISABLE-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill -; DISABLE-NEXT: add x29, sp, #80 ; =80 +; DISABLE-NEXT: stp x29, x30, [sp, #-96]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x28, x27, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: stp x26, x25, [sp, #32] ; 16-byte Folded Spill +; DISABLE-NEXT: stp x24, x23, [sp, #48] ; 16-byte Folded Spill +; DISABLE-NEXT: stp x22, x21, [sp, #64] ; 16-byte Folded Spill +; DISABLE-NEXT: stp x20, x19, [sp, #80] ; 16-byte Folded Spill +; DISABLE-NEXT: mov x29, sp ; DISABLE-NEXT: sub x9, sp, #32 ; =32 ; DISABLE-NEXT: and sp, x9, #0xffffffffffffffe0 -; DISABLE-NEXT: .cfi_def_cfa w29, 16 -; DISABLE-NEXT: .cfi_offset w30, -8 -; DISABLE-NEXT: .cfi_offset w29, -16 -; DISABLE-NEXT: .cfi_offset w19, -24 -; DISABLE-NEXT: .cfi_offset w20, -32 -; DISABLE-NEXT: .cfi_offset w21, -40 -; DISABLE-NEXT: .cfi_offset w22, -48 -; DISABLE-NEXT: .cfi_offset w23, -56 -; DISABLE-NEXT: .cfi_offset w24, -64 -; DISABLE-NEXT: .cfi_offset w25, -72 -; DISABLE-NEXT: .cfi_offset w26, -80 -; DISABLE-NEXT: .cfi_offset w27, -88 -; DISABLE-NEXT: .cfi_offset w28, -96 +; DISABLE-NEXT: .cfi_def_cfa w29, 96 +; DISABLE-NEXT: .cfi_offset w30, -88 +; DISABLE-NEXT: .cfi_offset w29, -96 +; DISABLE-NEXT: .cfi_offset w19, -8 +; DISABLE-NEXT: .cfi_offset w20, -16 +; DISABLE-NEXT: .cfi_offset w21, -24 +; DISABLE-NEXT: .cfi_offset w22, -32 +; DISABLE-NEXT: .cfi_offset w23, -40 +; DISABLE-NEXT: .cfi_offset w24, -48 +; DISABLE-NEXT: .cfi_offset w25, -56 +; DISABLE-NEXT: .cfi_offset w26, -64 +; DISABLE-NEXT: .cfi_offset w27, -72 +; DISABLE-NEXT: .cfi_offset w28, -80 ; DISABLE-NEXT: lsl w8, w0, w1 ; DISABLE-NEXT: lsl w9, w1, w0 ; DISABLE-NEXT: lsr w10, w0, w1 @@ -1119,13 +1119,13 @@ ; DISABLE-NEXT: stp w0, w1, [x2, #4] ; DISABLE-NEXT: stp w16, w11, [x2, #12] ; DISABLE-NEXT: stp w13, w14, [x2, #20] -; DISABLE-NEXT: sub sp, x29, #80 ; =80 -; DISABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload -; DISABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload -; DISABLE-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload -; DISABLE-NEXT: ldp x24, x23, [sp, #32] ; 16-byte Folded Reload -; DISABLE-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload -; DISABLE-NEXT: ldp x28, x27, [sp], #96 ; 16-byte Folded Reload +; DISABLE-NEXT: mov sp, x29 +; DISABLE-NEXT: ldp x20, x19, [sp, #80] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x22, x21, [sp, #64] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x24, x23, [sp, #48] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x26, x25, [sp, #32] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x28, x27, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x29, x30, [sp], #96 ; 16-byte Folded Reload ; DISABLE-NEXT: ret %tmp = alloca i32, align 32 %tmp1 = shl i32 %a, %b Index: test/CodeGen/AArch64/cgp-usubo.ll =================================================================== --- test/CodeGen/AArch64/cgp-usubo.ll +++ test/CodeGen/AArch64/cgp-usubo.ll @@ -151,10 +151,10 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, i64* %p, i1 %cond) nounwind { ; CHECK-LABEL: usubo_ult_cmp_dominates_i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x22, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp x21, x20, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: mov w20, w3 -; CHECK-NEXT: stp x19, x30, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: tbz w3, #0, .LBB8_3 ; CHECK-NEXT: // %bb.1: // %t ; CHECK-NEXT: cmp x0, x1 @@ -172,9 +172,9 @@ ; CHECK-NEXT: .LBB8_3: // %f ; CHECK-NEXT: and w0, w20, #0x1 ; CHECK-NEXT: .LBB8_4: // %f -; CHECK-NEXT: ldp x19, x30, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp x21, x20, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x22, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: br i1 %cond, label %t, label %f Index: test/CodeGen/AArch64/fast-isel-sp-adjust.ll =================================================================== --- test/CodeGen/AArch64/fast-isel-sp-adjust.ll +++ test/CodeGen/AArch64/fast-isel-sp-adjust.ll @@ -13,6 +13,7 @@ ; CHECK-ERRORS: LLVM ERROR: FastISel missed call ; CHECK-LABEL: foo: +; CHECK: sub ; CHECK-DAG: mov x[[SP:[0-9]+]], sp ; CHECK-DAG: mov [[TMP:w[0-9]+]], #4104 ; CHECK: mov w[[OFFSET:[0-9]+]], [[TMP]] Index: test/CodeGen/AArch64/irg_sp_tagp.ll =================================================================== --- test/CodeGen/AArch64/irg_sp_tagp.ll +++ test/CodeGen/AArch64/irg_sp_tagp.ll @@ -35,7 +35,7 @@ define void @realign() { entry: ; CHECK-LABEL: realign: -; CHECK: add x29, sp, #16 +; CHECK: mov x29, sp ; CHECK: and sp, x{{[0-9]*}}, #0xffffffffffffffc0 ; CHECK: irg [[R:x[0-9]+]], sp{{$}} ; CHECK: addg x0, [[R]], #0, #1 Index: test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll =================================================================== --- test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll +++ test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll @@ -5,7 +5,7 @@ define void @test_w29_reserved() { ; CHECK-LABEL: test_w29_reserved: -; CHECK: add x29, sp, #{{[0-9]+}} +; CHECK: mov x29, sp %val1 = load volatile i32, i32* @var %val2 = load volatile i32, i32* @var Index: test/CodeGen/AArch64/reverse-csr-restore-seq.mir =================================================================== --- test/CodeGen/AArch64/reverse-csr-restore-seq.mir +++ test/CodeGen/AArch64/reverse-csr-restore-seq.mir @@ -97,10 +97,10 @@ B %bb.1 bb.1: - ; CHECK: $x20, $lr = frame-destroy LDPXi $sp, 2 - ; BEFORELDSTOPT-NEXT: $x21 = frame-destroy LDRXui $sp, 0 + ; CHECK: $x21, $x20 = frame-destroy LDPXi $sp, 2 + ; BEFORELDSTOPT-NEXT: $lr = frame-destroy LDRXui $sp, 0 ; BEFORELDSTOPT-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0 - ; AFTERLDSTOPT-NEXT: early-clobber $sp, $x21 = frame-destroy LDRXpost $sp, 32 + ; AFTERLDSTOPT-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 32 RET_ReallyLR ... Index: test/CodeGen/AArch64/seh-finally.ll =================================================================== --- test/CodeGen/AArch64/seh-finally.ll +++ test/CodeGen/AArch64/seh-finally.ll @@ -86,7 +86,7 @@ define void @stack_realign() #0 personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { entry: ; CHECK-LABEL: stack_realign -; CHECK: add x29, sp, #16 +; CHECK: mov x29, sp ; CHECK: sub x9, sp, #64 ; CHECK: and sp, x9, #0xffffffffffffffe0 ; CHECK: mov x19, sp @@ -205,15 +205,15 @@ define void @vla_and_realign(i32 %n) #0 personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { entry: ; CHECK-LABEL: vla_and_realign -; CHECK: add x29, sp, #16 +; CHECK: mov x29, sp ; CHECK: sub x9, sp, #64 ; CHECK: and sp, x9, #0xffffffffffffffe0 ; CHECK: mov x19, sp ; CHECK: mov x1, #-2 ; CHECK: stur x1, [x19] ; CHECK: .set .Lvla_and_realign$frame_escape_0, 32 -; CHECK: stur w0, [x29, #-4] -; CHECK: ldur w8, [x29, #-4] +; CHECK: str w0, [x29, #28] +; CHECK: ldr w8, [x29, #28] ; CHECK: mov x9, sp ; CHECK: str x9, [x19, #24] ; CHECK: str x8, [x19, #16] Index: test/CodeGen/AArch64/shadow-call-stack.ll =================================================================== --- test/CodeGen/AArch64/shadow-call-stack.ll +++ test/CodeGen/AArch64/shadow-call-stack.ll @@ -41,7 +41,7 @@ %res12 = add i32 %res1, %res2 %res34 = add i32 %res3, %res4 %res1234 = add i32 %res12, %res34 - ; CHECK: ldp {{.*}}x30, [sp + ; CHECK: ldp x30,{{.*}}, [sp ; CHECK: ldr x30, [x18, #-8]! ; CHECK: ret ret i32 %res1234 Index: test/CodeGen/AArch64/sink-copy-for-shrink-wrap.ll =================================================================== --- test/CodeGen/AArch64/sink-copy-for-shrink-wrap.ll +++ test/CodeGen/AArch64/sink-copy-for-shrink-wrap.ll @@ -4,7 +4,7 @@ ; CHECK-NOT: stp ; CHECK-NOT: mov w{{[0-9]+}}, w0 ; CHECK-LABEL: %bb.1: -; CHECK: stp x19 +; CHECK: stp {{.*}}x19 ; CHECK: mov w{{[0-9]+}}, w0 define i32 @shrinkwrapme(i32 %paramAcrossCall, i32 %paramNotAcrossCall) { Index: test/CodeGen/AArch64/spill-stack-realignment.mir =================================================================== --- test/CodeGen/AArch64/spill-stack-realignment.mir +++ test/CodeGen/AArch64/spill-stack-realignment.mir @@ -13,7 +13,7 @@ # CHECK: id: 0, name: '', type: default, offset: -64, size: 4, alignment: 64 # CHECK-NEXT: stack-id: default # CHECK-NEXT: local-offset: -64 -# CHECK: id: 1, name: '', type: default, offset: -20, size: 4, alignment: 4 +# CHECK: id: 1, name: '', type: default, offset: -4, size: 4, alignment: 4 # CHECK-NEXT: stack-id: default # CHECK-NEXT: local-offset: -68 stack: @@ -23,7 +23,7 @@ # CHECK: body: # CHECK: $sp = ANDXri killed ${{x[0-9]+}}, 7865 # CHECK: STRSui $s0, $sp, 0 -# CHECK: STURSi $s0, $fp, -4 +# CHECK: STRSui $s0, $fp, 7 body: | bb.0.entry: liveins: $s0 Index: test/CodeGen/AArch64/sponentry.ll =================================================================== --- test/CodeGen/AArch64/sponentry.ll +++ test/CodeGen/AArch64/sponentry.ll @@ -38,8 +38,8 @@ ; CHECK: foo: ; CHECK: sub sp, sp, #448 -; CHECK: add x29, sp, #432 -; CHECK: add x1, x29, #16 +; CHECK: add x29, sp, #416 +; CHECK: add x1, x29, #32 ; CHECK: bl _setjmpex ; NOFP: sub sp, sp, #432 Index: test/CodeGen/AArch64/stack-guard-reassign.ll =================================================================== --- test/CodeGen/AArch64/stack-guard-reassign.ll +++ test/CodeGen/AArch64/stack-guard-reassign.ll @@ -5,4 +5,4 @@ ; CHECK-LABEL: fn: ; CHECK: adrp x8, __stack_chk_guard ; CHECK-NEXT: ldr x8, [x8, :lo12:__stack_chk_guard] -; CHECK-NEXT: stur x8, [x29, #-24] +; CHECK-NEXT: stur x8, [x29, #-8] Index: test/CodeGen/AArch64/stack-guard-vaarg.ll =================================================================== --- test/CodeGen/AArch64/stack-guard-vaarg.ll +++ test/CodeGen/AArch64/stack-guard-vaarg.ll @@ -9,7 +9,7 @@ ; CHECK: ldr [[GUARD:x[0-9]+]]{{.*}}:lo12:__stack_chk_guard] ; Make sure the canary is placed relative to the frame pointer, not ; the stack pointer. -; CHECK: stur [[GUARD]], [x29, #-24] +; CHECK: stur [[GUARD]], [x29, #-8] define void @test(i8* %i, ...) #0 { entry: %buf = alloca [10 x i8], align 1 Index: test/CodeGen/AArch64/swifterror.ll =================================================================== --- test/CodeGen/AArch64/swifterror.ll +++ test/CodeGen/AArch64/swifterror.ll @@ -420,13 +420,13 @@ } ; CHECK-APPLE-LABEL: params_in_reg ; Save callee saved registers and swifterror since it will be clobbered by the first call to params_in_reg2. -; CHECK-APPLE: stp x21, x28, [sp +; CHECK-APPLE: stp x29, x30, [sp +; CHECK-APPLE: str x28, [sp ; CHECK-APPLE: stp x27, x26, [sp ; CHECK-APPLE: stp x25, x24, [sp ; CHECK-APPLE: stp x23, x22, [sp ; CHECK-APPLE: stp x20, x19, [sp -; CHECK-APPLE: stp x29, x30, [sp -; CHECK-APPLE: str x20, [sp +; CHECK-APPLE: stp x20, x21, [sp ; Store argument registers. ; CHECK-APPLE: mov x23, x7 ; CHECK-APPLE: mov x24, x6 @@ -462,8 +462,6 @@ ; CHECK-APPLE: bl _params_in_reg2 ; Restore calle save registers but don't clober swifterror x21. ; CHECK-APPLE-NOT: x21 -; CHECK-APPLE: ldp x29, x30, [sp -; CHECK-APPLE-NOT: x21 ; CHECK-APPLE: ldp x20, x19, [sp ; CHECK-APPLE-NOT: x21 ; CHECK-APPLE: ldp x23, x22, [sp @@ -474,6 +472,8 @@ ; CHECK-APPLE-NOT: x21 ; CHECK-APPLE: ldr x28, [sp ; CHECK-APPLE-NOT: x21 +; CHECK-APPLE: ldp x29, x30, [sp +; CHECK-APPLE-NOT: x21 ; CHECK-APPLE: ret define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) { %error_ptr_ref = alloca swifterror %swift_error*, align 8 @@ -486,12 +486,12 @@ ; CHECK-APPLE-LABEL: params_and_return_in_reg ; Store callee saved registers. -; CHECK-APPLE: stp x20, x28, [sp, #24 +; CHECK-APPLE: stp x29, x30, [sp, #32 +; CHECK-APPLE: str x28, [sp ; CHECK-APPLE: stp x27, x26, [sp ; CHECK-APPLE: stp x25, x24, [sp ; CHECK-APPLE: stp x23, x22, [sp ; CHECK-APPLE: stp x20, x19, [sp -; CHECK-APPLE: stp x29, x30, [sp ; Save original arguments. ; CHECK-APPLE: mov x23, x21 ; CHECK-APPLE: str x7, [sp, #16] @@ -524,7 +524,7 @@ ; CHECK-APPLE: mov x4, x26 ; CHECK-APPLE: mov x5, x25 ; CHECK-APPLE: mov x6, x24 -; CHECK-APPLE: ldp x7, x20, [sp, #16] +; CHECK-APPLE: ldr x7, [sp, #16] ; CHECK-APPLE: mov x21, x23 ; CHECK-APPLE: bl _params_and_return_in_reg2 ; Store return values. @@ -537,7 +537,7 @@ ; CHECK-APPLE: mov x28, x6 ; CHECK-APPLE: mov x23, x7 ; Save swifterror %err. -; CHECK-APPLE: str x21, [sp, #24] +; CHECK-APPLE: stur x21, [x29, #-8] ; Setup call. ; CHECK-APPLE: mov w0, #1 ; CHECK-APPLE: mov w1, #2 @@ -561,12 +561,12 @@ ; CHECK-APPLE: mov x6, x28 ; CHECK-APPLE: mov x7, x23 ; Restore swifterror %err and callee save registers. -; CHECK-APPLE: ldp x21, x28, [sp, #24 -; CHECK-APPLE: ldp x29, x30, [sp ; CHECK-APPLE: ldp x20, x19, [sp ; CHECK-APPLE: ldp x23, x22, [sp ; CHECK-APPLE: ldp x25, x24, [sp ; CHECK-APPLE: ldp x27, x26, [sp +; CHECK-APPLE: ldr x28, [sp +; CHECK-APPLE: ldp x29, x30, [sp, #32 ; CHECK-APPLE: ret define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) { %error_ptr_ref = alloca swifterror %swift_error*, align 8 Index: test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll =================================================================== --- test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll +++ test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll @@ -202,16 +202,16 @@ define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-LABEL: in_multiuse_A_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: and w20, w8, #0xffff00 ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 @@ -223,15 +223,15 @@ define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-LABEL: in_multiuse_B_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, #0xffff00 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 Index: test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll =================================================================== --- test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll +++ test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll @@ -208,16 +208,16 @@ define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-LABEL: in_multiuse_A_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: and w20, w8, #0x55555555 ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, 1431655765 @@ -229,15 +229,15 @@ define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-LABEL: in_multiuse_B_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, #0x55555555 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, 1431655765 Index: test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll =================================================================== --- test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll +++ test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll @@ -204,16 +204,16 @@ define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-LABEL: in_multiuse_A_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: and w20, w8, #0xf0f0f0f ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, 252645135 @@ -225,15 +225,15 @@ define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-LABEL: in_multiuse_B_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, #0xf0f0f0f ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, 252645135 Index: test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll =================================================================== --- test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll +++ test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll @@ -197,16 +197,16 @@ define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-LABEL: in_multiuse_A_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: and w20, w8, #0xffff ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, 65535 @@ -218,15 +218,15 @@ define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-LABEL: in_multiuse_B_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, #0xffff ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, 65535 Index: test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll =================================================================== --- test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll +++ test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll @@ -552,16 +552,16 @@ define i32 @in_multiuse_A(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind { ; CHECK-LABEL: in_multiuse_A: ; CHECK: // %bb.0: -; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: and w20, w8, w3 ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask @@ -572,15 +572,15 @@ define i32 @in_multiuse_B(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind { ; CHECK-LABEL: in_multiuse_B: ; CHECK: // %bb.0: -; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, w3 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask Index: test/CodeGen/AArch64/unreachable-emergency-spill-slot.mir =================================================================== --- test/CodeGen/AArch64/unreachable-emergency-spill-slot.mir +++ test/CodeGen/AArch64/unreachable-emergency-spill-slot.mir @@ -14,11 +14,11 @@ body: | bb.0: STRXui undef $x8, %stack.0, 0 - ; CHECK: STURXi undef $x8, $fp, -24 + ; CHECK: STRXui undef $x8, $fp, 0 B %bb.1 bb.1: - liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp, $lr - RET_ReallyLR implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28 + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28 + RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28 ... --- name: fpDoesNotFit Index: test/CodeGen/AArch64/win64_vararg.ll =================================================================== --- test/CodeGen/AArch64/win64_vararg.ll +++ test/CodeGen/AArch64/win64_vararg.ll @@ -103,21 +103,21 @@ declare i64* @__local_stdio_printf_options() local_unnamed_addr #4 ; CHECK-LABEL: fp -; CHECK: str x21, [sp, #-96]! -; CHECK: stp x19, x20, [sp, #16] -; CHECK: stp x29, x30, [sp, #32] -; CHECK: add x29, sp, #32 -; CHECK: add x8, x29, #24 +; CHECK: stp x29, x30, [sp, #-96] +; CHECK: str x21, [sp, #16] +; CHECK: stp x19, x20, [sp, #32] +; CHECK: mov x29, sp +; CHECK: add x8, x29, #56 ; CHECK: mov x19, x2 ; CHECK: mov x20, x1 ; CHECK: mov x21, x0 -; CHECK: stp x3, x4, [x29, #24] -; CHECK: stp x5, x6, [x29, #40] -; CHECK: str x7, [x29, #56] -; CHECK: str x8, [sp, #8] +; CHECK: stp x3, x4, [x29, #56] +; CHECK: stp x5, x6, [x29, #72] +; CHECK: str x7, [x29, #88] +; CHECK: str x8, [x29, #24] ; CHECK: bl __local_stdio_printf_options ; CHECK: ldr x8, [x0] -; CHECK: add x5, x29, #24 +; CHECK: add x5, x29, #56 ; CHECK: mov x1, x21 ; CHECK: mov x2, x20 ; CHECK: orr x0, x8, #0x2 @@ -126,9 +126,9 @@ ; CHECK: bl __stdio_common_vsprintf ; CHECK: cmp w0, #0 ; CHECK: csinv w0, w0, wzr, ge -; CHECK: ldp x29, x30, [sp, #32] -; CHECK: ldp x19, x20, [sp, #16] -; CHECK: ldr x21, [sp], #96 +; CHECK: ldp x19, x20, [sp, #32] +; CHECK: ldr x21, [sp, #16] +; CHECK: ldp x29, x30, [sp], #96 ; CHECK: ret define i32 @fp(i8*, i64, i8*, ...) local_unnamed_addr #6 { %4 = alloca i8*, align 8 @@ -150,26 +150,26 @@ attributes #6 = { "no-frame-pointer-elim"="true" } ; CHECK-LABEL: vla -; CHECK: str x23, [sp, #-112]! -; CHECK: stp x21, x22, [sp, #16] -; CHECK: stp x19, x20, [sp, #32] -; CHECK: stp x29, x30, [sp, #48] -; CHECK: add x29, sp, #48 -; CHECK: add x8, x29, #16 -; CHECK: stur x8, [x29, #-40] +; CHECK: stp x29, x30, [sp, #-112]! +; CHECK: str x23, [sp, #16] +; CHECK: stp x21, x22, [sp, #32] +; CHECK: stp x19, x20, [sp, #48] +; CHECK: mov x29, sp +; CHECK: add x8, x29, #64 +; CHECK: str x8, [x29, #24] ; CHECK: mov w8, w0 ; CHECK: add x8, x8, #15 ; CHECK: lsr x15, x8, #4 ; CHECK: mov x19, x1 ; CHECK: mov [[REG2:x[0-9]+]], sp -; CHECK: stp x2, x3, [x29, #16] -; CHECK: stp x4, x5, [x29, #32] -; CHECK: stp x6, x7, [x29, #48] +; CHECK: stp x2, x3, [x29, #64] +; CHECK: stp x4, x5, [x29, #80] +; CHECK: stp x6, x7, [x29, #96] ; CHECK: bl __chkstk ; CHECK: mov x8, sp ; CHECK: sub [[REG:x[0-9]+]], x8, x15, lsl #4 ; CHECK: mov sp, [[REG]] -; CHECK: ldur [[REG3:x[0-9]+]], [x29, #-40] +; CHECK: ldr [[REG3:x[0-9]+]], [x29, #24] ; CHECK: sxtw [[REG4:x[0-9]+]], w0 ; CHECK: bl __local_stdio_printf_options ; CHECK: ldr x8, [x0] @@ -181,11 +181,11 @@ ; CHECK: mov x5, [[REG3]] ; CHECK: bl __stdio_common_vsprintf ; CHECK: mov sp, [[REG2]] -; CHECK: sub sp, x29, #48 -; CHECK: ldp x29, x30, [sp, #48] -; CHECK: ldp x19, x20, [sp, #32] -; CHECK: ldp x21, x22, [sp, #16] -; CHECK: ldr x23, [sp], #112 +; CHECK: mov sp, x29 +; CHECK: ldp x19, x20, [sp, #48] +; CHECK: ldp x21, x22, [sp, #32] +; CHECK: ldr x23, [sp, #16] +; CHECK: ldp x29, x30, [sp], #112 ; CHECK: ret define void @vla(i32, i8*, ...) local_unnamed_addr { %3 = alloca i8*, align 8 @@ -212,9 +212,9 @@ ; CHECK-LABEL: snprintf ; CHECK-DAG: sub sp, sp, #96 -; CHECK-DAG: str x21, [sp, #16] -; CHECK-DAG: stp x19, x20, [sp, #24] -; CHECK-DAG: str x30, [sp, #40] +; CHECK-DAG: str x30, [sp, #16] +; CHECK-DAG: str x21, [sp, #24] +; CHECK-DAG: stp x19, x20, [sp, #32] ; CHECK-DAG: add x8, sp, #56 ; CHECK-DAG: mov x19, x2 ; CHECK-DAG: mov x20, x1 @@ -232,9 +232,9 @@ ; CHECK-DAG: mov x3, x19 ; CHECK-DAG: mov x4, xzr ; CHECK-DAG: bl __stdio_common_vsprintf -; CHECK-DAG: ldr x30, [sp, #40] -; CHECK-DAG: ldp x19, x20, [sp, #24] -; CHECK-DAG: ldr x21, [sp, #16] +; CHECK-DAG: ldr x30, [sp, #16] +; CHECK-DAG: ldr x21, [sp, #24] +; CHECK-DAG: ldp x19, x20, [sp, #32] ; CHECK-DAG: cmp w0, #0 ; CHECK-DAG: csinv w0, w0, wzr, ge ; CHECK-DAG: add sp, sp, #96 Index: test/CodeGen/AArch64/wineh-frame5.mir =================================================================== --- test/CodeGen/AArch64/wineh-frame5.mir +++ test/CodeGen/AArch64/wineh-frame5.mir @@ -3,12 +3,10 @@ # Check multiple epilogues, save_reg, save_reg_x. # CHECK-LABEL: bb.0.entry: -# CHECK: early-clobber $sp = frame-setup STRXpre killed $x28, $sp, -32 -# CHECK-NEXT: frame-setup SEH_SaveReg_X 28, -32 -# CHECK-NEXT: frame-setup STRXui killed $x19, $sp, 1 -# CHECK-NEXT: frame-setup SEH_SaveReg 19, 8 -# CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 2 -# CHECK-NEXT: frame-setup SEH_SaveReg 30, 16 +# CHECK: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -4 +# CHECK-NEXT: frame-setup SEH_SaveFPLR_X -32 +# CHECK-NEXT: frame-setup STRXui killed $x19, $sp, 2 +# CHECK-NEXT: frame-setup SEH_SaveReg 19, 16 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 496, 0 # CHECK-NEXT: frame-setup SEH_StackAlloc 496 # CHECK-NEXT: frame-setup SEH_PrologEnd @@ -17,12 +15,10 @@ # CHECK: frame-destroy SEH_EpilogStart # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 496, 0 # CHECK-NEXT: frame-destroy SEH_StackAlloc 496 -# CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 2 -# CHECK-NEXT: frame-destroy SEH_SaveReg 30, 16 -# CHECK-NEXT: $x19 = frame-destroy LDRXui $sp, 1 -# CHECK-NEXT: frame-destroy SEH_SaveReg 19, 8 -# CHECK-NEXT: early-clobber $sp, $x28 = frame-destroy LDRXpost $sp, 32 -# CHECK-NEXT: frame-destroy SEH_SaveReg_X 28, -32 +# CHECK-NEXT: $x19 = frame-destroy LDRXui $sp, 2 +# CHECK-NEXT: frame-destroy SEH_SaveReg 19, 16 +# CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 4 +# CHECK-NEXT: frame-destroy SEH_SaveFPLR_X -32 # CHECK-NEXT: frame-destroy SEH_EpilogEnd # CHECK-NEXT: TCRETURNdi @"?func2@@YAHXZ", 0, csr_aarch64_aapcs, implicit $sp Index: test/CodeGen/AArch64/wineh-frame7.mir =================================================================== --- test/CodeGen/AArch64/wineh-frame7.mir +++ test/CodeGen/AArch64/wineh-frame7.mir @@ -3,10 +3,8 @@ # Test that stack probe results in Nop unwind codes in the prologue. Test # save_fplr, save_reg_x and stack_alloc with multiple updates. -# CHECK: early-clobber $sp = frame-setup STRXpre killed $x28, $sp, -32 -# CHECK-NEXT: frame-setup SEH_SaveReg_X 28, -32 -# CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 2 -# CHECK-NEXT: frame-setup SEH_SaveFPLR 16 +# CHECK: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 +# CHECK-NEXT: frame-setup SEH_SaveFPLR_X -16 # CHECK-NEXT: $x15 = frame-setup MOVZXi 56009, 0 # CHECK-NEXT: frame-setup SEH_Nop # CHECK-NEXT: $x15 = frame-setup MOVKXi $x15, 2, 16 @@ -21,10 +19,8 @@ # CHECK-NEXT: frame-destroy SEH_StackAlloc 2990080 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 3216, 0 # CHECK-NEXT: frame-destroy SEH_StackAlloc 3216 -# CHECK-NEXT: $fp, $lr = frame-destroy LDPXi $sp, 2 -# CHECK-NEXT: frame-destroy SEH_SaveFPLR 16 -# CHECK-NEXT: early-clobber $sp, $x28 = frame-destroy LDRXpost $sp, 32 -# CHECK-NEXT: frame-destroy SEH_SaveReg_X 28, -32 +# CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 +# CHECK-NEXT: frame-destroy SEH_SaveFPLR_X -16 # CHECK-NEXT: frame-destroy SEH_EpilogEnd # CHECK-NEXT: RET_ReallyLR implicit killed $w0 --- | Index: test/CodeGen/AArch64/wineh-try-catch-realign.ll =================================================================== --- test/CodeGen/AArch64/wineh-try-catch-realign.ll +++ test/CodeGen/AArch64/wineh-try-catch-realign.ll @@ -9,17 +9,17 @@ ; it shouldn't access the parent's frame via sp, and the prologue and ; epilogue should be symmetrical. ; CHECK-LABEL: "?catch$2@?0??a@@YAXXZ@4HA": -; CHECK: str x28, [sp, #-32]! -; CHECK-NEXT: str x19, [sp, #8] -; CHECK-NEXT: stp x29, x30, [sp, #16] +; CHECK: stp x29, x30, [sp, #-32]! +; CHECK-NEXT: str x28, [sp, #16] +; CHECK-NEXT: str x19, [sp, #24] ; CHECK-NEXT: add x0, x19, #64 ; CHECK-NEXT: mov w1, wzr ; CHECK-NEXT: bl "?bb@@YAXPEAHH@Z" ; CHECK-NEXT: adrp x0, .LBB0_1 ; CHECK-NEXT: add x0, x0, .LBB0_1 -; CHECK-NEXT: ldp x29, x30, [sp, #16] -; CHECK-NEXT: ldr x19, [sp, #8] -; CHECK-NEXT: ldr x28, [sp], #32 +; CHECK-NEXT: ldr x19, [sp, #24] +; CHECK-NEXT: ldr x28, [sp, #16] +; CHECK-NEXT: ldp x29, x30, [sp], #32 ; CHECK-NEXT: ret Index: test/CodeGen/AArch64/wineh-try-catch.ll =================================================================== --- test/CodeGen/AArch64/wineh-try-catch.ll +++ test/CodeGen/AArch64/wineh-try-catch.ll @@ -15,11 +15,11 @@ ; on-entry sp - 672. We check this offset in the table later on. ; CHECK-LABEL: "?func@@YAHXZ": -; CHECK: str x28, [sp, #-48]! -; CHECK: str x21, [sp, #8] -; CHECK: stp x19, x20, [sp, #16] -; CHECK: stp x29, x30, [sp, #32] -; CHECK: add x29, sp, #32 +; CHECK: stp x29, x30, [sp, #-48]! +; CHECK: str x28, [sp, #16] +; CHECK: str x21, [sp, #24] +; CHECK: stp x19, x20, [sp, #32] +; CHECK: mov x29, sp ; CHECK: sub sp, sp, #624 ; CHECK: mov x19, sp ; CHECK: mov x0, #-2 @@ -47,10 +47,10 @@ ; CHECK-LABEL: "?catch$2@?0??func@@YAHXZ@4HA": ; Check that the stack space is allocated only for the callee saved registers. -; CHECK: str x28, [sp, #-48]! -; CHECK: str x21, [sp, #8] -; CHECK: stp x19, x20, [sp, #16] -; CHECK: stp x29, x30, [sp, #32] +; CHECK: stp x29, x30, [sp, #-48]! +; CHECK: str x28, [sp, #16] +; CHECK: str x21, [sp, #24] +; CHECK: stp x19, x20, [sp, #32] ; CHECK: add x20, x19, #12 ; Check that there are no further stack updates. @@ -87,18 +87,18 @@ ; UNWIND: Prologue [ ; UNWIND-NEXT: ; nop ; UNWIND-NEXT: ; sub sp, #624 -; UNWIND-NEXT: ; add fp, sp, #32 -; UNWIND-NEXT: ; stp x29, x30, [sp, #32] -; UNWIND-NEXT: ; stp x19, x20, [sp, #16] -; UNWIND-NEXT: ; str x21, [sp, #8] -; UNWIND-NEXT: ; str x28, [sp, #48]! +; UNWIND-NEXT: ; mov fp, sp +; UNWIND-NEXT: ; stp x19, x20, [sp, #32] +; UNWIND-NEXT: ; str x21, [sp, #24] +; UNWIND-NEXT: ; str x28, [sp, #16] +; UNWIND-NEXT: ; stp x29, x30, [sp, #-48]! ; UNWIND-NEXT: ; end ; UNWIND: Function: ?catch$2@?0??func@@YAHXZ@4HA ; UNWIND: Prologue [ -; UNWIND-NEXT: ; stp x29, x30, [sp, #32] -; UNWIND-NEXT: ; stp x19, x20, [sp, #16] -; UNWIND-NEXT: ; str x21, [sp, #8] -; UNWIND-NEXT: ; str x28, [sp, #48]! +; UNWIND-NEXT: ; stp x19, x20, [sp, #32] +; UNWIND-NEXT: ; str x21, [sp, #24] +; UNWIND-NEXT: ; str x28, [sp, #16] +; UNWIND-NEXT: ; stp x29, x30, [sp, #-48]! ; UNWIND-NEXT: ; end target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128" Index: test/CodeGen/AArch64/wineh_shrinkwrap.mir =================================================================== --- test/CodeGen/AArch64/wineh_shrinkwrap.mir +++ test/CodeGen/AArch64/wineh_shrinkwrap.mir @@ -9,14 +9,14 @@ # The same test gets shrink wrapped on Linux ARM64. # WIN64-LABEL: bb.0.entry: -# WIN64: early-clobber $sp = frame-setup STRXpre killed $x28, $sp, -32 +# WIN64: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -4 # WIN64-LABEL: bb.1: # WIN64-LABEL: bb.2.if.then: # LINUX-LABEL: bb.0.entry: # LINUX-LABEL: bb.1: # LINUX-LABEL: bb.2.if.then: -# LINUX: early-clobber $sp = frame-setup STRXpre killed $x28, $sp, -32 +# LINUX: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -32 --- | ; ModuleID = 'shrink.cpp' target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"