Index: llvm/lib/Target/AArch64/AArch64FrameLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -150,6 +150,10 @@ MachineBasicBlock::iterator MBBI) const; void emitCalleeSavedSVELocations(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const; + void emitCalleeSavedGPRRestores(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const; + void emitCalleeSavedSVERestores(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const; }; } // End llvm namespace Index: llvm/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -579,6 +579,49 @@ emitCalleeSavedSVELocations(MBB, MBBI); } +static void emitCalleeSavedRestores(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + bool SVE) { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + const std::vector &CSI = MFI.getCalleeSavedInfo(); + if (CSI.empty()) + return; + + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + DebugLoc DL = MBB.findDebugLoc(MBBI); + + for (const auto &Info : CSI) { + if (SVE != + (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector)) + continue; + + unsigned Reg = Info.getReg(); + if (SVE && + !static_cast(TRI).regNeedsCFI(Reg, Reg)) + continue; + + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore( + nullptr, TRI.getDwarfRegNum(Info.getReg(), true))); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameDestroy); + } +} + +void AArch64FrameLowering::emitCalleeSavedGPRRestores( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { + emitCalleeSavedRestores(MBB, MBBI, false); +} + +void AArch64FrameLowering::emitCalleeSavedSVERestores( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { + emitCalleeSavedRestores(MBB, MBBI, true); +} + // Find a scratch register that we can use at the start of the prologue to // re-align the stack pointer. We avoid using callee-save registers since they // may appear to be free when this is called from canUseAsPrologue (during @@ -886,7 +929,9 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, - bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI, bool InProlog = true) { + bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI, + MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup, + int CFAOffset = 0) { unsigned NewOpc; switch (MBBI->getOpcode()) { default: @@ -949,10 +994,9 @@ if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 || CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(CSStackSizeInc), TII, - InProlog ? MachineInstr::FrameSetup - : MachineInstr::FrameDestroy, - false, false, nullptr, EmitCFI && InProlog); + StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag, + false, false, nullptr, EmitCFI, + StackOffset::getFixed(CFAOffset)); return std::prev(MBBI); } @@ -980,16 +1024,15 @@ // Generate a new SEH code that corresponds to the new instruction. if (NeedsWinCFI) { *HasWinCFI = true; - InsertSEH(*MIB, *TII, - InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy); + InsertSEH(*MIB, *TII, FrameFlag); } - if (EmitCFI && InProlog) { + if (EmitCFI) { unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::cfiDefCfaOffset(nullptr, -CSStackSizeInc)); + MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset - CSStackSizeInc)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); + .setMIFlags(FrameFlag); } return std::prev(MBB.erase(MBBI)); @@ -1128,6 +1171,14 @@ .addReg(AArch64::X18) .addImm(-8) .setMIFlag(MachineInstr::FrameDestroy); + + if (MF.getInfo()->needsAsyncUnwindInfo()) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, 18)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameDestroy); + } } void AArch64FrameLowering::emitPrologue(MachineFunction &MF, @@ -1163,7 +1214,6 @@ MFnI.needsUnwindInfo()); if (MFnI.shouldSignReturnAddress()) { - unsigned PACI; if (MFnI.shouldSignWithBKey()) { BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY)) @@ -1657,6 +1707,7 @@ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool NeedsWinCFI = needsWinCFI(MF); + bool EmitCFI = MF.getInfo()->needsAsyncUnwindInfo(); bool HasWinCFI = false; bool IsFunclet = false; auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); }); @@ -1666,9 +1717,11 @@ IsFunclet = isFuncletReturnInstr(*MBBI); } - auto ShadowStackEpilogue = make_scope_exit([&]() { + auto ShadowStackEpilogueAndCSRRestores = make_scope_exit([&]() { if (needsShadowCallStackPrologueEpilogue(MF)) emitShadowCallStackEpilogue(*TII, MF, MBB, MBB.getFirstTerminator(), DL); + if (EmitCFI) + emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator()); }); int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF) @@ -1684,33 +1737,6 @@ // to restore in this particular epilogue. int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB); - // The stack frame should be like below, - // - // ---------------------- --- - // | | | - // | BytesInStackArgArea| CalleeArgStackSize - // | (NumReusableBytes) | (of tail call) - // | | --- - // | | | - // ---------------------| --- | - // | | | | - // | CalleeSavedReg | | | - // | (CalleeSavedStackSize)| | | - // | | | | - // ---------------------| | NumBytes - // | | StackSize (StackAdjustUp) - // | LocalStackSize | | | - // | (covering callee | | | - // | args) | | | - // | | | | - // ---------------------- --- --- - // - // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize - // = StackSize + ArgumentPopSize - // - // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps - // it as the 2nd argument of AArch64ISD::TC_RETURN. - auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); }); bool IsWin64 = @@ -1747,9 +1773,11 @@ bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes); // Assume we can't combine the last pop with the sp restore. + bool CombineAfterCSRBump = false; if (!CombineSPBump && PrologueSaveSize != 0) { MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); - while (AArch64InstrInfo::isSEHInstruction(*Pop)) + while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION || + AArch64InstrInfo::isSEHInstruction(*Pop)) Pop = std::prev(Pop); // Converting the last ldp to a post-index ldp is valid only if the last // ldp's offset is 0. @@ -1757,16 +1785,17 @@ // If the offset is 0 and the AfterCSR pop is not actually trying to // allocate more stack for arguments (in space that an untimely interrupt // may clobber), convert it to a post-index ldp. - if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) + if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) { convertCalleeSaveRestoreToSPPrePostIncDec( MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, - false, false); - else { + EmitCFI, MachineInstr::FrameDestroy, PrologueSaveSize); + } else { // If not, make sure to emit an add after the last ldp. // We're doing this by transfering the size to be restored from the // adjustment *before* the CSR pops to the adjustment *after* the CSR // pops. AfterCSRPopSize += PrologueSaveSize; + CombineAfterCSRBump = true; } } @@ -1815,14 +1844,30 @@ // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { assert(!SVEStackSize && "Cannot combine SP bump with SVE"); + + // When we are about to restore the CSRs, the CFA register is SP again. + if (EmitCFI && hasFP(MF)) { + const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo(); + unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true); + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfa(nullptr, Reg, NumBytes)); + BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameDestroy); + } + + emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, - &HasWinCFI); + &HasWinCFI, EmitCFI, + StackOffset::getFixed(NumBytes)); if (HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) .setMIFlag(MachineInstr::FrameDestroy); + // if (EmitCFI) + // emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator()); return; } @@ -1851,29 +1896,40 @@ // Deallocate the SVE area. if (SVEStackSize) { if (AFI->isStackRealigned()) { - if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) + if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { // Set SP to start of SVE callee-save area from which they can // be reloaded. The code below will deallocate the stack space // space by moving FP -> SP. emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP, StackOffset::getScalable(-CalleeSavedSize), TII, MachineInstr::FrameDestroy); + } } else { if (AFI->getSVECalleeSavedStackSize()) { // Deallocate the non-SVE locals first before we can deallocate (and // restore callee saves) from the SVE area. - emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(NumBytes), TII, - MachineInstr::FrameDestroy); + emitFrameOffset( + MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, + StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy, + false, false, nullptr, EmitCFI && !hasFP(MF), + SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize)); NumBytes = 0; } emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, - DeallocateBefore, TII, MachineInstr::FrameDestroy); + DeallocateBefore, TII, MachineInstr::FrameDestroy, false, + false, nullptr, EmitCFI && !hasFP(MF), + SVEStackSize + + StackOffset::getFixed(NumBytes + PrologueSaveSize)); emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, - DeallocateAfter, TII, MachineInstr::FrameDestroy); + DeallocateAfter, TII, MachineInstr::FrameDestroy, false, + false, nullptr, EmitCFI && !hasFP(MF), + DeallocateAfter + + StackOffset::getFixed(NumBytes + PrologueSaveSize)); } + if (EmitCFI) + emitCalleeSavedSVERestores(MBB, RestoreEnd); } if (!hasFP(MF)) { @@ -1883,14 +1939,21 @@ if (RedZone && AfterCSRPopSize == 0) return; + // Pop the local variables off the stack. If there are no callee-saved + // registers, it means we are actually positioned at the terminator and can + // combine stack increment for the locals and the stack increment for + // callee-popped arguments into (possibly) a single instruction and be done. bool NoCalleeSaveRestore = PrologueSaveSize == 0; int64_t StackRestoreBytes = RedZone ? 0 : NumBytes; if (NoCalleeSaveRestore) StackRestoreBytes += AfterCSRPopSize; - emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(StackRestoreBytes), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); + emitFrameOffset( + MBB, LastPopI, DL, AArch64::SP, AArch64::SP, + StackOffset::getFixed(StackRestoreBytes), TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI, + StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize)); + // If we were able to combine the local stack pop with the argument pop, // then we're done. if (NoCalleeSaveRestore || AfterCSRPopSize == 0) { @@ -1899,6 +1962,10 @@ TII->get(AArch64::SEH_EpilogEnd)) .setMIFlag(MachineInstr::FrameDestroy); } + + // if (EmitCFI) + // emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator()); + return; } @@ -1919,6 +1986,17 @@ StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); + // When we are about to restore the CSRs, the CFA register is SP again. + if (EmitCFI && hasFP(MF)) { + const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo(); + unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( + nullptr, Reg, PrologueSaveSize)); + BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameDestroy); + } + // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save // code in the prologue. @@ -1928,11 +2006,14 @@ emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(AfterCSRPopSize), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); + MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, + EmitCFI, StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0)); } if (HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) .setMIFlag(MachineInstr::FrameDestroy); + // if (EmitCFI) + // emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator()); } /// getFrameIndexReference - Provide a base+offset reference to an FI slot for @@ -2626,6 +2707,7 @@ MachineMemOperand::MOLoad, Size, Alignment)); if (NeedsWinCFI) InsertSEH(MIB, TII, MachineInstr::FrameDestroy); + return MIB->getIterator(); }; Index: llvm/lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -364,7 +364,8 @@ const TargetRegisterInfo *TRI); MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, - unsigned Reg, const StackOffset &Offset); + unsigned Reg, const StackOffset &Offset, + bool LastAdjustmentWasScalable = true); MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA); Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4011,11 +4011,12 @@ MCCFIInstruction llvm::createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, - const StackOffset &Offset) { + const StackOffset &Offset, + bool LastAdjustmentWasScalable) { if (Offset.getScalable()) return createDefCFAExpression(TRI, Reg, Offset); - if (FrameReg == Reg) + if (FrameReg == Reg && !LastAdjustmentWasScalable) return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed())); unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); @@ -4147,7 +4148,7 @@ const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); unsigned CFIIndex = - MF.addFrameInst(createDefCFA(TRI, FrameReg, DestReg, CFAOffset)); + MF.addFrameInst(createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(Flag); Index: llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +++ llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -127,9 +127,17 @@ bool AArch64FunctionInfo::needsAsyncUnwindInfo() const { if (!NeedsAsyncUnwindInfo.hasValue()) - NeedsAsyncUnwindInfo = MF.needsFrameMoves() && - !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && - !MF.getSubtarget().isTargetMachO(); + NeedsAsyncUnwindInfo = + MF.needsFrameMoves() && + !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + !MF.getSubtarget().isTargetMachO() && + !MF.getFunction() + .hasMinSize(); // TODO: this is to prevent epilogue unwind info + // from being emitted for homogeneous epilogues, + // outlined functions, and functions outlined from. + // Alternatively, we could disable those + // optimisations. Or eve better, add async unwind + // support to them! assert((!NeedsAsyncUnwindInfo.getValue() || needsUnwindInfo()) && "Async unwind info must imply unwind info"); Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll @@ -82,6 +82,7 @@ ; CHECK-CAS-O0-NEXT: mov v0.d[1], x8 ; CHECK-CAS-O0-NEXT: str q0, [x0] ; CHECK-CAS-O0-NEXT: add sp, sp, #16 +; CHECK-CAS-O0-NEXT: .cfi_def_cfa_offset 0 ; CHECK-CAS-O0-NEXT: ret %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire @@ -167,6 +168,7 @@ ; CHECK-CAS-O0-NEXT: mov v0.d[1], x8 ; CHECK-CAS-O0-NEXT: str q0, [x0] ; CHECK-CAS-O0-NEXT: add sp, sp, #16 +; CHECK-CAS-O0-NEXT: .cfi_def_cfa_offset 0 ; CHECK-CAS-O0-NEXT: ret %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval monotonic seq_cst @@ -252,6 +254,7 @@ ; CHECK-CAS-O0-NEXT: mov v0.d[1], x8 ; CHECK-CAS-O0-NEXT: str q0, [x0] ; CHECK-CAS-O0-NEXT: add sp, sp, #16 +; CHECK-CAS-O0-NEXT: .cfi_def_cfa_offset 0 ; CHECK-CAS-O0-NEXT: ret %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval release acquire @@ -337,6 +340,7 @@ ; CHECK-CAS-O0-NEXT: mov v0.d[1], x8 ; CHECK-CAS-O0-NEXT: str q0, [x0] ; CHECK-CAS-O0-NEXT: add sp, sp, #16 +; CHECK-CAS-O0-NEXT: .cfi_def_cfa_offset 0 ; CHECK-CAS-O0-NEXT: ret %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval release acquire %val = extractvalue { i128, i1 } %pair, 0 Index: llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll @@ -15,6 +15,8 @@ ; CHECK-NEXT: bl byval_i32 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call void @byval_i32(i32* byval(i32) %incoming) ret void @@ -67,9 +69,14 @@ ; CHECK-NEXT: ldr q0, [x0, #240] ; CHECK-NEXT: str q0, [sp, #240] ; CHECK-NEXT: bl byval_a64i32 +; CHECK-NEXT: .cfi_def_cfa wsp, 288 ; CHECK-NEXT: ldr x28, [sp, #272] // 8-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #288 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w28 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret call void @byval_a64i32([64 x i32]* byval([64 x i32]) %incoming) ret void Index: llvm/test/CodeGen/AArch64/aarch64-be-bv.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-be-bv.ll +++ llvm/test/CodeGen/AArch64/aarch64-be-bv.ll @@ -545,6 +545,8 @@ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: bl f_v2i64 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call i8 @f_v8i8(<8 x i8> ) call i16 @f_v4i16(<4 x i16> ) @@ -591,6 +593,8 @@ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: bl f_v2i64 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call i8 @f_v8i8(<8 x i8> ) call i16 @f_v4i16(<4 x i16> ) @@ -637,6 +641,8 @@ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: bl f_v2i64 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call i8 @f_v8i8(<8 x i8> ) call i16 @f_v4i16(<4 x i16> ) @@ -683,6 +689,8 @@ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: bl f_v2i64 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call i8 @f_v8i8(<8 x i8> ) call i16 @f_v4i16(<4 x i16> ) @@ -729,6 +737,8 @@ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: bl f_v2i64 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call i8 @f_v8i8(<8 x i8> ) call i16 @f_v4i16(<4 x i16> ) @@ -775,6 +785,8 @@ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: bl f_v2i64 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call i8 @f_v8i8(<8 x i8> ) call i16 @f_v4i16(<4 x i16> ) @@ -821,6 +833,8 @@ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: bl f_v2i64 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call i8 @f_v8i8(<8 x i8> ) call i16 @f_v4i16(<4 x i16> ) @@ -867,6 +881,8 @@ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: bl f_v2i64 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call i8 @f_v8i8(<8 x i8> ) call i16 @f_v4i16(<4 x i16> ) @@ -908,6 +924,8 @@ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: bl f_v4i32 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call i8 @f_v8i8(<8 x i8> ) call i16 @f_v4i16(<4 x i16> ) @@ -947,6 +965,8 @@ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: bl f_v4i32 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call i8 @f_v8i8(<8 x i8> ) call i16 @f_v4i16(<4 x i16> ) @@ -991,6 +1011,8 @@ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: bl f_v2i64 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call i8 @f_v8i8(<8 x i8> ) call i16 @f_v4i16(<4 x i16> ) @@ -1023,6 +1045,8 @@ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: bl f_v4i32 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call i8 @f_v16i8(<16 x i8> ) call i16 @f_v8i16(<8 x i16> ) Index: llvm/test/CodeGen/AArch64/aarch64-load-ext.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-load-ext.ll +++ llvm/test/CodeGen/AArch64/aarch64-load-ext.ll @@ -453,6 +453,7 @@ ; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: add sp, sp, #16 +; CHECK-LE-NEXT: .cfi_def_cfa_offset 0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: bitcast: @@ -465,6 +466,7 @@ ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-BE-NEXT: rev64 v0.4h, v0.4h ; CHECK-BE-NEXT: add sp, sp, #16 +; CHECK-BE-NEXT: .cfi_def_cfa_offset 0 ; CHECK-BE-NEXT: ret %2 = bitcast i32 %0 to <4 x i8> ret <4 x i8> %2 Index: llvm/test/CodeGen/AArch64/addsub-constant-folding.ll =================================================================== --- llvm/test/CodeGen/AArch64/addsub-constant-folding.ll +++ llvm/test/CodeGen/AArch64/addsub-constant-folding.ll @@ -28,6 +28,9 @@ ; CHECK-NEXT: bl use ; CHECK-NEXT: add w0, w19, #10 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = add i32 %arg, 8 call void @use(i32 %t0) @@ -62,6 +65,8 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = add <4 x i32> %arg, call void @vec_use(<4 x i32> %t0) @@ -105,6 +110,9 @@ ; CHECK-NEXT: bl use ; CHECK-NEXT: add w0, w19, #6 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = add i32 %arg, 8 call void @use(i32 %t0) @@ -139,6 +147,8 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = add <4 x i32> %arg, call void @vec_use(<4 x i32> %t0) @@ -184,6 +194,9 @@ ; CHECK-NEXT: mov w8, #-6 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = add i32 %arg, 8 call void @use(i32 %t0) @@ -218,6 +231,8 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = add <4 x i32> %arg, call void @vec_use(<4 x i32> %t0) @@ -261,6 +276,9 @@ ; CHECK-NEXT: bl use ; CHECK-NEXT: sub w0, w19, #6 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = sub i32 %arg, 8 call void @use(i32 %t0) @@ -295,6 +313,8 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = sub <4 x i32> %arg, call void @vec_use(<4 x i32> %t0) @@ -338,6 +358,9 @@ ; CHECK-NEXT: bl use ; CHECK-NEXT: sub w0, w19, #10 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = sub i32 %arg, 8 call void @use(i32 %t0) @@ -372,6 +395,8 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = sub <4 x i32> %arg, call void @vec_use(<4 x i32> %t0) @@ -417,6 +442,9 @@ ; CHECK-NEXT: mov w8, #10 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = sub i32 %arg, 8 call void @use(i32 %t0) @@ -451,6 +479,8 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = sub <4 x i32> %arg, call void @vec_use(<4 x i32> %t0) @@ -497,6 +527,9 @@ ; CHECK-NEXT: mov w8, #10 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = sub i32 8, %arg call void @use(i32 %t0) @@ -531,6 +564,8 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %arg call void @vec_use(<4 x i32> %t0) @@ -577,6 +612,9 @@ ; CHECK-NEXT: mov w8, #6 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = sub i32 8, %arg call void @use(i32 %t0) @@ -611,6 +649,8 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %arg call void @vec_use(<4 x i32> %t0) @@ -656,6 +696,9 @@ ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = sub i32 8, %arg call void @use(i32 %t0) @@ -690,6 +733,8 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %arg call void @vec_use(<4 x i32> %t0) Index: llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll =================================================================== --- llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll +++ llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll @@ -392,6 +392,8 @@ ; CHECK-NEXT: str d4, [x8, #32] ; CHECK-NEXT: str d5, [x8, #40] ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %1 = call %T_IN_BLOCK @return_in_block() store %T_IN_BLOCK %1, %T_IN_BLOCK* @in_block_store @@ -427,6 +429,8 @@ ; CHECK-NEXT: ldp d0, d1, [x8] ; CHECK-NEXT: bl callee_in_block ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %1 = load %T_IN_BLOCK, %T_IN_BLOCK* @in_block_store call void @callee_in_block(%T_IN_BLOCK %1) @@ -469,6 +473,8 @@ ; CHECK-NEXT: stp q3, q2, [x8, #32] ; CHECK-NEXT: str d4, [x8, #64] ; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %1 = call %T_IN_MEMORY @return_in_memory() store %T_IN_MEMORY %1, %T_IN_MEMORY* @in_memory_store @@ -510,6 +516,8 @@ ; CHECK-NEXT: bl callee_in_memory ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %1 = load %T_IN_MEMORY, %T_IN_MEMORY* @in_memory_store call void @callee_in_memory(%T_IN_MEMORY %1) @@ -545,6 +553,8 @@ ; CHECK-NEXT: str d1, [x8, #16] ; CHECK-NEXT: str w1, [x8, #24] ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %1 = call %T_NO_BLOCK @return_no_block() store %T_NO_BLOCK %1, %T_NO_BLOCK* @no_block_store @@ -579,6 +589,8 @@ ; CHECK-NEXT: ldr d0, [x8] ; CHECK-NEXT: bl callee_no_block ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %1 = load %T_NO_BLOCK, %T_NO_BLOCK* @no_block_store call void @callee_no_block(%T_NO_BLOCK %1) Index: llvm/test/CodeGen/AArch64/arm64-atomic-128.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-atomic-128.ll +++ llvm/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -39,6 +39,8 @@ ; OUTLINE-NEXT: mov x4, x8 ; OUTLINE-NEXT: bl __aarch64_cas16_acq ; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-NEXT: .cfi_def_cfa_offset 0 +; OUTLINE-NEXT: .cfi_restore w30 ; OUTLINE-NEXT: ret ; ; LSE-LABEL: val_compare_and_swap: @@ -90,6 +92,8 @@ ; OUTLINE-NEXT: mov x4, x8 ; OUTLINE-NEXT: bl __aarch64_cas16_acq_rel ; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-NEXT: .cfi_def_cfa_offset 0 +; OUTLINE-NEXT: .cfi_restore w30 ; OUTLINE-NEXT: ret ; ; LSE-LABEL: val_compare_and_swap_seqcst: @@ -141,6 +145,8 @@ ; OUTLINE-NEXT: mov x4, x8 ; OUTLINE-NEXT: bl __aarch64_cas16_rel ; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-NEXT: .cfi_def_cfa_offset 0 +; OUTLINE-NEXT: .cfi_restore w30 ; OUTLINE-NEXT: ret ; ; LSE-LABEL: val_compare_and_swap_release: @@ -192,6 +198,8 @@ ; OUTLINE-NEXT: mov x4, x8 ; OUTLINE-NEXT: bl __aarch64_cas16_relax ; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-NEXT: .cfi_def_cfa_offset 0 +; OUTLINE-NEXT: .cfi_restore w30 ; OUTLINE-NEXT: ret ; ; LSE-LABEL: val_compare_and_swap_monotonic: Index: llvm/test/CodeGen/AArch64/arm64-fp128.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-fp128.ll +++ llvm/test/CodeGen/AArch64/arm64-fp128.ll @@ -90,6 +90,8 @@ ; CHECK-NEXT: str x0, [x8, :lo12:var64] ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %val = load fp128, fp128* @lhs, align 16 @@ -121,6 +123,8 @@ ; CHECK-NEXT: str x0, [x8, :lo12:var64] ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %val = load fp128, fp128* @lhs, align 16 @@ -150,6 +154,9 @@ ; CHECK-NEXT: bl __floatditf ; CHECK-NEXT: str q0, [x19, :lo12:lhs] ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %src32 = load i32, i32* @var32 @@ -180,6 +187,9 @@ ; CHECK-NEXT: bl __floatunditf ; CHECK-NEXT: str q0, [x19, :lo12:lhs] ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %src32 = load i32, i32* @var32 @@ -207,6 +217,8 @@ ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: cset w0, le ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %lhs = load fp128, fp128* @lhs, align 16 @@ -233,6 +245,8 @@ ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: cset w0, gt ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %lhs = load fp128, fp128* @lhs, align 16 @@ -266,6 +280,9 @@ ; CHECK-NEXT: orr w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %lhs = load fp128, fp128* @lhs, align 16 @@ -293,10 +310,14 @@ ; CHECK-NEXT: // %bb.1: // %iftrue ; CHECK-NEXT: mov w0, #42 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB11_2: // %iffalse ; CHECK-NEXT: mov w0, #29 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %lhs = load fp128, fp128* @lhs, align 16 @@ -356,6 +377,8 @@ ; CHECK-NEXT: str d0, [x8, :lo12:vardouble] ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %val = load fp128, fp128* @lhs, align 16 @@ -393,6 +416,9 @@ ; CHECK-NEXT: bl __extenddftf2 ; CHECK-NEXT: str q0, [x19, :lo12:lhs] ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %val = load fp128, fp128* @lhs, align 16 @@ -421,6 +447,7 @@ ; CHECK-NEXT: eor w8, w8, #0x80 ; CHECK-NEXT: strb w8, [sp, #15] ; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret ;; We convert this to fneg, and target-independent code expands it with Index: llvm/test/CodeGen/AArch64/arm64-memset-inline.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-memset-inline.ll +++ llvm/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -69,6 +69,8 @@ ; CHECK-NEXT: str wzr, [sp, #12] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [4 x i8], align 1 %cast = bitcast [4 x i8]* %buf to i8* @@ -86,6 +88,8 @@ ; CHECK-NEXT: add x0, sp, #8 ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [8 x i8], align 1 %cast = bitcast [8 x i8]* %buf to i8* @@ -107,6 +111,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [12 x i8], align 1 %cast = bitcast [12 x i8]* %buf to i8* @@ -127,6 +133,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [16 x i8], align 1 %cast = bitcast [16 x i8]* %buf to i8* @@ -148,6 +156,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [20 x i8], align 1 %cast = bitcast [20 x i8]* %buf to i8* @@ -170,6 +180,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [26 x i8], align 1 %cast = bitcast [26 x i8]* %buf to i8* @@ -191,6 +203,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [32 x i8], align 1 %cast = bitcast [32 x i8]* %buf to i8* @@ -213,6 +227,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [40 x i8], align 1 %cast = bitcast [40 x i8]* %buf to i8* @@ -235,6 +251,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [64 x i8], align 1 %cast = bitcast [64 x i8]* %buf to i8* @@ -258,6 +276,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [72 x i8], align 1 %cast = bitcast [72 x i8]* %buf to i8* @@ -282,6 +302,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #144 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [128 x i8], align 1 %cast = bitcast [128 x i8]* %buf to i8* @@ -311,6 +333,9 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #272 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %buf = alloca [256 x i8], align 1 %cast = bitcast [256 x i8]* %buf to i8* @@ -330,6 +355,8 @@ ; CHECK-NEXT: str w8, [sp, #12] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [4 x i8], align 1 %cast = bitcast [4 x i8]* %buf to i8* @@ -348,6 +375,8 @@ ; CHECK-NEXT: add x0, sp, #8 ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [8 x i8], align 1 %cast = bitcast [8 x i8]* %buf to i8* @@ -370,6 +399,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [12 x i8], align 1 %cast = bitcast [12 x i8]* %buf to i8* @@ -391,6 +422,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [16 x i8], align 1 %cast = bitcast [16 x i8]* %buf to i8* @@ -413,6 +446,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [20 x i8], align 1 %cast = bitcast [20 x i8]* %buf to i8* @@ -436,6 +471,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [26 x i8], align 1 %cast = bitcast [26 x i8]* %buf to i8* @@ -457,6 +494,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [32 x i8], align 1 %cast = bitcast [32 x i8]* %buf to i8* @@ -480,6 +519,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [40 x i8], align 1 %cast = bitcast [40 x i8]* %buf to i8* @@ -502,6 +543,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [64 x i8], align 1 %cast = bitcast [64 x i8]* %buf to i8* @@ -526,6 +569,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [72 x i8], align 1 %cast = bitcast [72 x i8]* %buf to i8* @@ -550,6 +595,8 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #144 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %buf = alloca [128 x i8], align 1 %cast = bitcast [128 x i8]* %buf to i8* @@ -579,6 +626,9 @@ ; CHECK-NEXT: bl something ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #272 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %buf = alloca [256 x i8], align 1 %cast = bitcast [256 x i8]* %buf to i8* Index: llvm/test/CodeGen/AArch64/arm64-neon-copy.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1239,6 +1239,7 @@ ; CHECK-NEXT: mov v1.h[3], v0.h[3] ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %tmp = extractelement <8 x i16> %x, i32 %idx %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0 @@ -1267,6 +1268,7 @@ ; CHECK-NEXT: mov v1.h[3], v0.h[3] ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %tmp = extractelement <8 x i16> %x, i32 0 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx Index: llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll +++ llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll @@ -904,6 +904,15 @@ ; CHECK-NEXT: mov v2.b[15], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 +; CHECK-NEXT: .cfi_restore w24 +; CHECK-NEXT: .cfi_restore w25 +; CHECK-NEXT: .cfi_restore w26 ; CHECK-NEXT: ret %tmp3 = srem <16 x i8> %A, %B; ret <16 x i8> %tmp3 @@ -1268,6 +1277,15 @@ ; CHECK-NEXT: mov v2.b[15], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 +; CHECK-NEXT: .cfi_restore w24 +; CHECK-NEXT: .cfi_restore w25 +; CHECK-NEXT: .cfi_restore w26 ; CHECK-NEXT: ret %tmp3 = urem <16 x i8> %A, %B; ret <16 x i8> %tmp3 @@ -1489,6 +1507,8 @@ ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %tmp3 = frem <2 x float> %A, %B; ret <2 x float> %tmp3 @@ -1532,6 +1552,8 @@ ; CHECK-NEXT: mov v1.s[3], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %tmp3 = frem <4 x float> %A, %B; ret <4 x float> %tmp3 @@ -1545,6 +1567,8 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl fmod ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %tmp3 = frem <1 x double> %A, %B; ret <1 x double> %tmp3 @@ -1571,6 +1595,8 @@ ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %tmp3 = frem <2 x double> %A, %B; ret <2 x double> %tmp3 Index: llvm/test/CodeGen/AArch64/atomicrmw-O0.ll =================================================================== --- llvm/test/CodeGen/AArch64/atomicrmw-O0.ll +++ llvm/test/CodeGen/AArch64/atomicrmw-O0.ll @@ -41,6 +41,7 @@ ; NOLSE-NEXT: .LBB0_5: // %atomicrmw.end ; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload ; NOLSE-NEXT: add sp, sp, #32 +; NOLSE-NEXT: .cfi_def_cfa_offset 0 ; NOLSE-NEXT: ret ; ; LSE-LABEL: test_rmw_add_8: @@ -90,6 +91,7 @@ ; NOLSE-NEXT: .LBB1_5: // %atomicrmw.end ; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload ; NOLSE-NEXT: add sp, sp, #32 +; NOLSE-NEXT: .cfi_def_cfa_offset 0 ; NOLSE-NEXT: ret ; ; LSE-LABEL: test_rmw_add_16: @@ -139,6 +141,7 @@ ; NOLSE-NEXT: .LBB2_5: // %atomicrmw.end ; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload ; NOLSE-NEXT: add sp, sp, #32 +; NOLSE-NEXT: .cfi_def_cfa_offset 0 ; NOLSE-NEXT: ret ; ; LSE-LABEL: test_rmw_add_32: @@ -188,6 +191,7 @@ ; NOLSE-NEXT: .LBB3_5: // %atomicrmw.end ; NOLSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload ; NOLSE-NEXT: add sp, sp, #32 +; NOLSE-NEXT: .cfi_def_cfa_offset 0 ; NOLSE-NEXT: ret ; ; LSE-LABEL: test_rmw_add_64: @@ -254,6 +258,7 @@ ; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload ; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload ; NOLSE-NEXT: add sp, sp, #48 +; NOLSE-NEXT: .cfi_def_cfa_offset 0 ; NOLSE-NEXT: ret ; ; LSE-LABEL: test_rmw_add_128: @@ -296,6 +301,7 @@ ; LSE-NEXT: ldr x1, [sp, #40] // 8-byte Folded Reload ; LSE-NEXT: ldr x0, [sp, #48] // 8-byte Folded Reload ; LSE-NEXT: add sp, sp, #80 +; LSE-NEXT: .cfi_def_cfa_offset 0 ; LSE-NEXT: ret entry: %res = atomicrmw add i128* %dst, i128 1 seq_cst @@ -339,6 +345,7 @@ ; NOLSE-NEXT: .LBB5_5: // %atomicrmw.end ; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload ; NOLSE-NEXT: add sp, sp, #32 +; NOLSE-NEXT: .cfi_def_cfa_offset 0 ; NOLSE-NEXT: ret ; ; LSE-LABEL: test_rmw_nand_8: @@ -368,6 +375,7 @@ ; LSE-NEXT: .LBB5_2: // %atomicrmw.end ; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload ; LSE-NEXT: add sp, sp, #32 +; LSE-NEXT: .cfi_def_cfa_offset 0 ; LSE-NEXT: ret entry: %res = atomicrmw nand i8* %dst, i8 1 seq_cst @@ -412,6 +420,7 @@ ; NOLSE-NEXT: .LBB6_5: // %atomicrmw.end ; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload ; NOLSE-NEXT: add sp, sp, #32 +; NOLSE-NEXT: .cfi_def_cfa_offset 0 ; NOLSE-NEXT: ret ; ; LSE-LABEL: test_rmw_nand_16: @@ -441,6 +450,7 @@ ; LSE-NEXT: .LBB6_2: // %atomicrmw.end ; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload ; LSE-NEXT: add sp, sp, #32 +; LSE-NEXT: .cfi_def_cfa_offset 0 ; LSE-NEXT: ret entry: %res = atomicrmw nand i16* %dst, i16 1 seq_cst @@ -485,6 +495,7 @@ ; NOLSE-NEXT: .LBB7_5: // %atomicrmw.end ; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload ; NOLSE-NEXT: add sp, sp, #32 +; NOLSE-NEXT: .cfi_def_cfa_offset 0 ; NOLSE-NEXT: ret ; ; LSE-LABEL: test_rmw_nand_32: @@ -514,6 +525,7 @@ ; LSE-NEXT: .LBB7_2: // %atomicrmw.end ; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload ; LSE-NEXT: add sp, sp, #32 +; LSE-NEXT: .cfi_def_cfa_offset 0 ; LSE-NEXT: ret entry: %res = atomicrmw nand i32* %dst, i32 1 seq_cst @@ -561,6 +573,7 @@ ; NOLSE-NEXT: .LBB8_5: // %atomicrmw.end ; NOLSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload ; NOLSE-NEXT: add sp, sp, #32 +; NOLSE-NEXT: .cfi_def_cfa_offset 0 ; NOLSE-NEXT: ret ; ; LSE-LABEL: test_rmw_nand_64: @@ -593,6 +606,7 @@ ; LSE-NEXT: .LBB8_2: // %atomicrmw.end ; LSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload ; LSE-NEXT: add sp, sp, #32 +; LSE-NEXT: .cfi_def_cfa_offset 0 ; LSE-NEXT: ret entry: %res = atomicrmw nand i64* %dst, i64 1 seq_cst @@ -655,6 +669,7 @@ ; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload ; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload ; NOLSE-NEXT: add sp, sp, #48 +; NOLSE-NEXT: .cfi_def_cfa_offset 0 ; NOLSE-NEXT: ret ; ; LSE-LABEL: test_rmw_nand_128: @@ -700,6 +715,7 @@ ; LSE-NEXT: ldr x1, [sp, #40] // 8-byte Folded Reload ; LSE-NEXT: ldr x0, [sp, #48] // 8-byte Folded Reload ; LSE-NEXT: add sp, sp, #80 +; LSE-NEXT: .cfi_def_cfa_offset 0 ; LSE-NEXT: ret entry: %res = atomicrmw nand i128* %dst, i128 1 seq_cst Index: llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll =================================================================== --- llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll +++ llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll @@ -90,6 +90,7 @@ ; NOLSE-NEXT: // %bb.2: // %atomicrmw.end ; NOLSE-NEXT: stp x11, x10, [sp] ; NOLSE-NEXT: ldr q0, [sp], #32 +; NOLSE-NEXT: .cfi_def_cfa_offset 0 ; NOLSE-NEXT: ret ; ; LSE-LABEL: test_rmw_xchg_f128: @@ -106,6 +107,7 @@ ; LSE-NEXT: // %bb.2: // %atomicrmw.end ; LSE-NEXT: stp x11, x10, [sp] ; LSE-NEXT: ldr q0, [sp], #32 +; LSE-NEXT: .cfi_def_cfa_offset 0 ; LSE-NEXT: ret %res = atomicrmw xchg fp128* %dst, fp128 %new seq_cst ret fp128 %res Index: llvm/test/CodeGen/AArch64/bcmp-inline-small.ll =================================================================== --- llvm/test/CodeGen/AArch64/bcmp-inline-small.ll +++ llvm/test/CodeGen/AArch64/bcmp-inline-small.ll @@ -29,6 +29,8 @@ ; CHECKS-NEXT: cmp w0, #0 ; CHECKS-NEXT: cset w0, eq ; CHECKS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECKS-NEXT: .cfi_def_cfa_offset 0 +; CHECKS-NEXT: .cfi_restore w30 ; CHECKS-NEXT: ret entry: %bcmp = call i32 @bcmp(i8* %s1, i8* %s2, i64 15) @@ -61,6 +63,8 @@ ; CHECKS-NEXT: cmp w0, #0 ; CHECKS-NEXT: cset w0, eq ; CHECKS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECKS-NEXT: .cfi_def_cfa_offset 0 +; CHECKS-NEXT: .cfi_restore w30 ; CHECKS-NEXT: ret entry: %bcmp = call i32 @bcmp(i8* %s1, i8* %s2, i64 15) @@ -98,6 +102,8 @@ ; CHECKS-NEXT: cmp w0, #0 ; CHECKS-NEXT: cset w0, eq ; CHECKS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECKS-NEXT: .cfi_def_cfa_offset 0 +; CHECKS-NEXT: .cfi_restore w30 ; CHECKS-NEXT: ret entry: %memcmp = call i32 @memcmp(i8* %s1, i8* %s2, i64 31) Index: llvm/test/CodeGen/AArch64/build-one-lane.ll =================================================================== --- llvm/test/CodeGen/AArch64/build-one-lane.ll +++ llvm/test/CodeGen/AArch64/build-one-lane.ll @@ -327,6 +327,7 @@ ; CHECK-NEXT: stp q0, q1, [sp] ; CHECK-NEXT: strb w10, [x8, x9] ; CHECK-NEXT: ldp q0, q1, [sp], #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %b = insertelement <32 x i8> %a, i8 30, i32 %x ret <32 x i8> %b Index: llvm/test/CodeGen/AArch64/call-rv-marker.ll =================================================================== --- llvm/test/CodeGen/AArch64/call-rv-marker.ll +++ llvm/test/CodeGen/AArch64/call-rv-marker.ll @@ -44,6 +44,8 @@ ; CHECK-NEXT: bl foo0 ; SELDAG-NEXT: mov x29, x29 ; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: b foo2 ; entry: Index: llvm/test/CodeGen/AArch64/cmp-select-sign.ll =================================================================== --- llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -186,6 +186,8 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %a, %res = select <4 x i1> %c, <4 x i32> , <4 x i32> Index: llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll =================================================================== --- llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -444,6 +444,12 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret entry: %size = getelementptr inbounds %struct.Struct, %struct.Struct* %hdCall, i64 0, i32 0 @@ -513,6 +519,10 @@ ; CHECK-NEXT: .LBB7_8: // %return ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 @@ -589,10 +599,16 @@ ; CHECK-NEXT: // %bb.5: ; CHECK-NEXT: mov w0, #123 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB8_6: // %if.end ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 @@ -676,6 +692,10 @@ ; CHECK-NEXT: .LBB9_4: // %return ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: ret ; CHECK-LABEL-DAG: .LBB9_3 @@ -728,6 +748,8 @@ ; CHECK-NEXT: csel w0, w9, w8, ge ; CHECK-NEXT: bl zoo ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; [...] Index: llvm/test/CodeGen/AArch64/cond-br-tuning.ll =================================================================== --- llvm/test/CodeGen/AArch64/cond-br-tuning.ll +++ llvm/test/CodeGen/AArch64/cond-br-tuning.ll @@ -193,6 +193,9 @@ ; CHECK-NEXT: cbnz w19, .LBB9_2 ; CHECK-NEXT: // %bb.1: // %if.end ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB9_2: // %if.then ; CHECK-NEXT: bl foo Index: llvm/test/CodeGen/AArch64/csr-split.ll =================================================================== --- llvm/test/CodeGen/AArch64/csr-split.ll +++ llvm/test/CodeGen/AArch64/csr-split.ll @@ -19,12 +19,18 @@ ; CHECK-NEXT: b.eq .LBB0_2 ; CHECK-NEXT: // %bb.1: // %if.end ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %if.then ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: b callNonVoid ; ; CHECK-APPLE-LABEL: test1: @@ -93,10 +99,16 @@ ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: b callNonVoid ; CHECK-NEXT: .LBB1_3: // %return ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; ; CHECK-APPLE-LABEL: test2: @@ -171,6 +183,10 @@ ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; ; CHECK-APPLE-LABEL: test3: Index: llvm/test/CodeGen/AArch64/emutls.ll =================================================================== --- llvm/test/CodeGen/AArch64/emutls.ll +++ llvm/test/CodeGen/AArch64/emutls.ll @@ -15,6 +15,7 @@ ; ARM64-NEXT: ldr x0, [x0, :got_lo12:my_emutls_v_xyz] ; ARM64-NEXT: bl my_emutls_get_address ; ARM64-NEXT: ldr w0, [x0] +; ARM64-NEXT: .cfi_def_cfa wsp, 16 ; ARM64-NEXT: ldp x29, x30, [sp] entry: @@ -38,6 +39,7 @@ ; ARM64-NEXT: ldr x0, [x0, :got_lo12:__emutls_v.i1] ; ARM64-NEXT: bl __emutls_get_address ; ARM64-NEXT: ldr w0, [x0] +; ARM64-NEXT: .cfi_def_cfa wsp, 16 ; ARM64-NEXT: ldp x29, x30, [sp] entry: @@ -50,6 +52,7 @@ ; ARM64: adrp x0, :got:__emutls_v.i1 ; ARM64-NEXT: ldr x0, [x0, :got_lo12:__emutls_v.i1] ; ARM64-NEXT: bl __emutls_get_address +; ARM64-NEXT: .cfi_def_cfa wsp, 16 ; ARM64-NEXT: ldp x29, x30, [sp] entry: @@ -73,6 +76,7 @@ ; ARM64: adrp x0, __emutls_v.i3 ; ARM64: add x0, x0, :lo12:__emutls_v.i3 ; ARM64-NEXT: bl __emutls_get_address +; ARM64-NEXT: .cfi_def_cfa wsp, 16 ; ARM64-NEXT: ldp x29, x30, [sp] entry: Index: llvm/test/CodeGen/AArch64/fastcc.ll =================================================================== --- llvm/test/CodeGen/AArch64/fastcc.ll +++ llvm/test/CodeGen/AArch64/fastcc.ll @@ -49,11 +49,17 @@ ret void ; CHECK: ldp x29, x30, [sp, #32] ; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret ; CHECK-TAIL: ldp x29, x30, [sp, #32] ; CHECK-TAIL-NEXT: add sp, sp, #48 +; CHECK-TAIL-NEXT: .cfi_def_cfa_offset 0 +; CHECK-TAIL-NEXT: .cfi_restore w30 +; CHECK-TAIL-NEXT: .cfi_restore w29 ; CHECK-TAIL-NEXT: ret } @@ -100,13 +106,20 @@ ; CHECK-TAIL-NOT: sub sp, sp ret void +; CHECK-NEXT: .cfi_def_cfa wsp, 48 ; CHECK-NEXT: ldp x29, x30, [sp, #32] -; CHECK: add sp, sp, #48 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret ; CHECK-TAIL: ldp x29, x30, [sp, #32] ; CHECK-TAIL-NEXT: add sp, sp, #64 +; CHECK-TAIL-NEXT: .cfi_def_cfa_offset -16 +; CHECK-TAIL-NEXT: .cfi_restore w30 +; CHECK-TAIL-NEXT: .cfi_restore w29 ; CHECK-TAIL-NEXT: ret } @@ -146,12 +159,19 @@ ; CHECK-TAIL-NOT: sub sp, sp ret void -; CHECK: ldp x29, x30, [sp, #32] +; CHECK: .cfi_def_cfa wsp, 48 +; CHECK-NEXT: ldp x29, x30, [sp, #32] ; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret ; CHECK-TAIL: ldp x29, x30, [sp, #32] ; CHECK-TAIL-NEXT: add sp, sp, #80 +; CHECK-TAIL-NEXT: .cfi_def_cfa_offset -32 +; CHECK-TAIL-NEXT: .cfi_restore w30 +; CHECK-TAIL-NEXT: .cfi_restore w29 ; CHECK-TAIL-NEXT: ret } @@ -162,6 +182,8 @@ ; CHECK: nop ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: ldr x20, [sp], #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w20 ; CHECK-NEXT: ret ; CHECK-TAIL-LABEL: func_stack32_leaf: @@ -169,7 +191,10 @@ ; CHECK-TAIL: nop ; CHECK-TAIL-NEXT: //NO_APP ; CHECK-TAIL-NEXT: ldr x20, [sp], #16 -; CHECK-TAIL-NEXT: add sp, sp, #32 +; CHECK-TAIL-NEXT: .cfi_def_cfa_offset 0 +; CHECK-TAIL-NEXT: add sp, sp, #32 +; CHECK-TAIL-NEXT: .cfi_def_cfa_offset -32 +; CHECK-TAIL-NEXT: .cfi_restore w20 ; CHECK-TAIL-NEXT: ret ; CHECK-TAIL-RZ-LABEL: func_stack32_leaf: @@ -178,7 +203,10 @@ ; CHECK-TAIL-RZ: nop ; CHECK-TAIL-RZ-NEXT: //NO_APP ; CHECK-TAIL-RZ-NEXT: ldr x20, [sp], #16 -; CHECK-TAIL-RZ-NEXT: add sp, sp, #32 +; CHECK-TAIL-RZ-NEXT: .cfi_def_cfa_offset 0 +; CHECK-TAIL-RZ-NEXT: add sp, sp, #32 +; CHECK-TAIL-RZ-NEXT: .cfi_def_cfa_offset -32 +; CHECK-TAIL-RZ-NEXT: .cfi_restore w20 ; CHECK-TAIL-RZ-NEXT: ret ; Make sure there is a callee-save register to save/restore. @@ -196,6 +224,8 @@ ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: ldr x20, [sp, #16] ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w20 ; CHECK-NEXT: ret ; CHECK-TAIL-LABEL: func_stack32_leaf_local: @@ -206,6 +236,8 @@ ; CHECK-TAIL-NEXT: //NO_APP ; CHECK-TAIL-NEXT: ldr x20, [sp, #16] ; CHECK-TAIL-NEXT: add sp, sp, #64 +; CHECK-TAIL-NEXT: .cfi_def_cfa_offset -32 +; CHECK-TAIL-NEXT: .cfi_restore w20 ; CHECK-TAIL-NEXT: ret ; CHECK-TAIL-RZ-LABEL: func_stack32_leaf_local: @@ -214,7 +246,10 @@ ; CHECK-TAIL-RZ: nop ; CHECK-TAIL-RZ-NEXT: //NO_APP ; CHECK-TAIL-RZ-NEXT: ldr x20, [sp], #16 -; CHECK-TAIL-RZ-NEXT: add sp, sp, #32 +; CHECK-TAIL-RZ-NEXT: .cfi_def_cfa_offset 0 +; CHECK-TAIL-RZ-NEXT: add sp, sp, #32 +; CHECK-TAIL-RZ-NEXT: .cfi_def_cfa_offset -32 +; CHECK-TAIL-RZ-NEXT: .cfi_restore w20 ; CHECK-TAIL-RZ-NEXT: ret %val0 = alloca [2 x i64], align 8 @@ -228,16 +263,21 @@ define fastcc void @func_stack32_leaf_local_nocs([8 x i64], i128 %stacked0, i128 %stacked1) { ; CHECK-LABEL: func_stack32_leaf_local_nocs: ; CHECK: sub sp, sp, #16 -; CHECK: add sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret ; CHECK-TAIL-LABEL: func_stack32_leaf_local_nocs: ; CHECK-TAIL: sub sp, sp, #16 -; CHECK-TAIL: add sp, sp, #48 +; CHECK-TAIL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-TAIL-NEXT: add sp, sp, #48 +; CHECK-TAIL-NEXT: .cfi_def_cfa_offset -32 ; CHECK-TAIL-NEXT: ret ; CHECK-TAIL-RZ-LABEL: func_stack32_leaf_local_nocs: ; CHECK-TAIL-RZ: add sp, sp, #32 +; CHECK-TAIL-RZ-NEXT: .cfi_def_cfa_offset -32 ; CHECK-TAIL-RZ-NEXT: ret %val0 = alloca [2 x i64], align 8 Index: llvm/test/CodeGen/AArch64/fcopysign.ll =================================================================== --- llvm/test/CodeGen/AArch64/fcopysign.ll +++ llvm/test/CodeGen/AArch64/fcopysign.ll @@ -25,6 +25,7 @@ ; CHECK-NEXT: bfxil w8, w9, #0, #7 ; CHECK-NEXT: strb w8, [sp, #15] ; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret entry: %v = load double, double* @val_double, align 8 @@ -48,6 +49,7 @@ ; CHECK-NEXT: bfxil w8, w9, #0, #7 ; CHECK-NEXT: strb w8, [sp, #15] ; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret entry: %v0 = load fp128, fp128* @val_fp128, align 16 Index: llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -277,6 +277,9 @@ ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %x = call <1 x i32> @llvm.fptosi.sat.v1f128.v1i32(<1 x fp128> %f) ret <1 x i32> %x @@ -346,6 +349,12 @@ ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptosi.sat.v2f128.v2i32(<2 x fp128> %f) ret <2 x i32> %x @@ -436,6 +445,12 @@ ; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: add sp, sp, #128 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptosi.sat.v3f128.v3i32(<3 x fp128> %f) ret <3 x i32> %x @@ -546,6 +561,12 @@ ; CHECK-NEXT: ldp x22, x21, [sp, #112] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: add sp, sp, #144 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptosi.sat.v4f128.v4i32(<4 x fp128> %f) ret <4 x i32> %x @@ -866,6 +887,15 @@ ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptosi.sat.v2f32.v2i100(<2 x float> %f) ret <2 x i100> %x @@ -933,6 +963,15 @@ ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptosi.sat.v2f32.v2i128(<2 x float> %f) ret <2 x i128> %x @@ -1178,6 +1217,19 @@ ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #128 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 +; CHECK-NEXT: .cfi_restore w24 +; CHECK-NEXT: .cfi_restore w25 +; CHECK-NEXT: .cfi_restore w26 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 ; CHECK-NEXT: ret %x = call <4 x i100> @llvm.fptosi.sat.v4f32.v4i100(<4 x float> %f) ret <4 x i100> %x @@ -1283,6 +1335,19 @@ ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #128 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 +; CHECK-NEXT: .cfi_restore w24 +; CHECK-NEXT: .cfi_restore w25 +; CHECK-NEXT: .cfi_restore w26 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 ; CHECK-NEXT: ret %x = call <4 x i128> @llvm.fptosi.sat.v4f32.v4i128(<4 x float> %f) ret <4 x i128> %x @@ -1527,6 +1592,15 @@ ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double> %f) ret <2 x i100> %x @@ -1593,6 +1667,15 @@ ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double> %f) ret <2 x i128> %x @@ -1935,6 +2018,19 @@ ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 +; CHECK-NEXT: .cfi_restore w24 +; CHECK-NEXT: .cfi_restore w25 +; CHECK-NEXT: .cfi_restore w26 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 ; CHECK-NEXT: ret %x = call <4 x i100> @llvm.fptosi.sat.v4f16.v4i100(<4 x half> %f) ret <4 x i100> %x @@ -2043,6 +2139,19 @@ ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 +; CHECK-NEXT: .cfi_restore w24 +; CHECK-NEXT: .cfi_restore w25 +; CHECK-NEXT: .cfi_restore w26 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 ; CHECK-NEXT: ret %x = call <4 x i128> @llvm.fptosi.sat.v4f16.v4i128(<4 x half> %f) ret <4 x i128> %x @@ -2791,6 +2900,22 @@ ; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #192 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 +; CHECK-NEXT: .cfi_restore w24 +; CHECK-NEXT: .cfi_restore w25 +; CHECK-NEXT: .cfi_restore w26 +; CHECK-NEXT: .cfi_restore w27 +; CHECK-NEXT: .cfi_restore w28 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 ; CHECK-NEXT: ret %x = call <8 x i100> @llvm.fptosi.sat.v8f16.v8i100(<8 x half> %f) ret <8 x i100> %x @@ -2974,6 +3099,22 @@ ; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #192 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 +; CHECK-NEXT: .cfi_restore w24 +; CHECK-NEXT: .cfi_restore w25 +; CHECK-NEXT: .cfi_restore w26 +; CHECK-NEXT: .cfi_restore w27 +; CHECK-NEXT: .cfi_restore w28 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 ; CHECK-NEXT: ret %x = call <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half> %f) ret <8 x i128> %x Index: llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -270,6 +270,9 @@ ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %x = call <1 x i32> @llvm.fptoui.sat.v1f128.v1i32(<1 x fp128> %f) ret <1 x i32> %x @@ -323,6 +326,10 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptoui.sat.v2f128.v2i32(<2 x fp128> %f) ret <2 x i32> %x @@ -391,6 +398,10 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptoui.sat.v3f128.v3i32(<3 x fp128> %f) ret <3 x i32> %x @@ -475,6 +486,10 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: add sp, sp, #128 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptoui.sat.v4f128.v4i32(<4 x fp128> %f) ret <4 x i32> %x @@ -765,6 +780,13 @@ ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptoui.sat.v2f32.v2i100(<2 x float> %f) ret <2 x i100> %x @@ -815,6 +837,12 @@ ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptoui.sat.v2f32.v2i128(<2 x float> %f) ret <2 x i128> %x @@ -1022,6 +1050,17 @@ ; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 +; CHECK-NEXT: .cfi_restore w24 +; CHECK-NEXT: .cfi_restore w25 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: ret %x = call <4 x i100> @llvm.fptoui.sat.v4f32.v4i100(<4 x float> %f) ret <4 x i100> %x @@ -1104,6 +1143,16 @@ ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 +; CHECK-NEXT: .cfi_restore w24 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: ret %x = call <4 x i128> @llvm.fptoui.sat.v4f32.v4i128(<4 x float> %f) ret <4 x i128> %x @@ -1304,6 +1353,13 @@ ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double> %f) ret <2 x i100> %x @@ -1353,6 +1409,12 @@ ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double> %f) ret <2 x i128> %x @@ -1642,6 +1704,17 @@ ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 +; CHECK-NEXT: .cfi_restore w24 +; CHECK-NEXT: .cfi_restore w25 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: ret %x = call <4 x i100> @llvm.fptoui.sat.v4f16.v4i100(<4 x half> %f) ret <4 x i100> %x @@ -1727,6 +1800,16 @@ ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 +; CHECK-NEXT: .cfi_restore w24 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: ret %x = call <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half> %f) ret <4 x i128> %x @@ -2334,6 +2417,21 @@ ; CHECK-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #176 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 +; CHECK-NEXT: .cfi_restore w24 +; CHECK-NEXT: .cfi_restore w25 +; CHECK-NEXT: .cfi_restore w26 +; CHECK-NEXT: .cfi_restore w27 +; CHECK-NEXT: .cfi_restore w28 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: ret %x = call <8 x i100> @llvm.fptoui.sat.v8f16.v8i100(<8 x half> %f) ret <8 x i100> %x @@ -2480,6 +2578,21 @@ ; CHECK-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #176 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 +; CHECK-NEXT: .cfi_restore w24 +; CHECK-NEXT: .cfi_restore w25 +; CHECK-NEXT: .cfi_restore w26 +; CHECK-NEXT: .cfi_restore w27 +; CHECK-NEXT: .cfi_restore w28 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: ret %x = call <8 x i128> @llvm.fptoui.sat.v8f16.v8i128(<8 x half> %f) ret <8 x i128> %x Index: llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir =================================================================== --- llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir +++ llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir @@ -18,10 +18,15 @@ ; CHECK-NEXT: // implicit-def: $z8 ; CHECK-NEXT: // implicit-def: $p4 ; CHECK-NEXT: addvl sp, sp, #1 + ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 + ; CHECK-NEXT: .cfi_def_cfa wsp, 16 + ; CHECK-NEXT: .cfi_restore z8 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload + ; CHECK-NEXT: .cfi_def_cfa_offset 0 + ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret ... name: fix_restorepoint_p4 Index: llvm/test/CodeGen/AArch64/framelayout-sve.mir =================================================================== --- llvm/test/CodeGen/AArch64/framelayout-sve.mir +++ llvm/test/CodeGen/AArch64/framelayout-sve.mir @@ -65,8 +65,12 @@ # CHECK-NEXT: frame-setup CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: RET_ReallyLR # ASM-LABEL: test_allocate_sve: @@ -74,11 +78,21 @@ # ASM-NEXT: .cfi_offset w29, -16 # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG +# ASM: .cfi_def_cfa wsp, 32 +# ASM: .cfi_def_cfa_offset 16 +# ASM: .cfi_def_cfa_offset 0 +# ASM-NEXT: .cfi_restore w29 + # # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa: reg31 +32 +# UNWINDINFO: DW_CFA_def_cfa_offset: +16 +# UNWINDINFO: DW_CFA_def_cfa_offset: +0 +# UNWINDINFO-NEXT: DW_CFA_restore: reg29 + name: test_allocate_sve stack: @@ -114,9 +128,12 @@ # CHECK-NEXT: $x20 = IMPLICIT_DEF # CHECK-NEXT: $x21 = IMPLICIT_DEF # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $x21, $x20 = frame-destroy LDPXi $sp, 2 # CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 32 +# CHECK-COUNT-4: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_allocate_sve_gpr_callee_saves: @@ -126,6 +143,14 @@ # ASM-NEXT: .cfi_offset w29, -32 # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 16 * VG +# +# ASM: .cfi_def_cfa wsp, 48 +# ASM: .cfi_def_cfa_offset 32 +# ASM: .cfi_def_cfa_offset 0 +# ASM-NEXT: .cfi_restore w20 +# ASM-NEXT: .cfi_restore w21 +# ASM-NEXT: .cfi_restore w29 + # # UNWINDINFO: DW_CFA_def_cfa_offset: +32 # UNWINDINFO: DW_CFA_offset: reg20 -8 @@ -133,6 +158,14 @@ # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# +# UNWINDINFO: DW_CFA_def_cfa: reg31 +48 +# UNWINDINFO: DW_CFA_def_cfa_offset: +32 +# UNWINDINFO: DW_CFA_def_cfa_offset: +0 +# UNWINDINFO-NEXT: DW_CFA_restore: reg20 +# UNWINDINFO-NEXT: DW_CFA_restore: reg21 +# UNWINDINFO-NEXT: DW_CFA_restore: reg29 + name: test_allocate_sve_gpr_callee_saves stack: - { id: 0, stack-id: scalable-vector, size: 18, alignment: 2 } @@ -164,7 +197,9 @@ # CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0 # CHECK-NEXT: $sp = ANDXri killed $[[TMP]] # CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 +# CHECK-COUNT-3: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_allocate_sve_gpr_realigned: @@ -172,11 +207,23 @@ # ASM: .cfi_def_cfa w29, 16 # ASM-NEXT: .cfi_offset w30, -8 # ASM-NEXT: .cfi_offset w29, -16 +# +# ASM: .cfi_def_cfa wsp, 16 +# ASM: .cfi_def_cfa_offset 0 +# ASM-NEXT: .cfi_restore w30 +# ASM-NEXT: .cfi_restore w29 + # # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO: DW_CFA_def_cfa: reg29 +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 +# +# UNWINDINFO: DW_CFA_def_cfa: reg31 +16 +# UNWINDINFO: DW_CFA_def_cfa_offset: +0 +# UNWINDINFO-NEXT: DW_CFA_restore: reg30 +# UNWINDINFO-NEXT: DW_CFA_restore: reg29 + name: test_allocate_sve_gpr_realigned stack: @@ -216,8 +263,12 @@ # CHECK-NEXT: STR_PXI $p0, killed $[[TMP]], 7 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_address_sve: @@ -226,10 +277,21 @@ # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 24 * VG # +# ASM: .cfi_def_cfa wsp, 32 +# ASM: .cfi_def_cfa_offset 16 +# ASM: .cfi_def_cfa_offset 0 +# ASM-NEXT: .cfi_restore w29 +# # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# +# UNWINDINFO: DW_CFA_def_cfa: reg31 +32 +# UNWINDINFO: DW_CFA_def_cfa_offset: +16 +# UNWINDINFO: DW_CFA_def_cfa_offset: +0 +# UNWINDINFO-NEXT: DW_CFA_restore: reg29 + name: test_address_sve frameInfo: @@ -278,7 +340,9 @@ # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3 # CHECK: $sp = frame-destroy ADDXri $sp, 16, 0 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 +# CHECK-COUNT-3: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_address_sve_fp: @@ -287,10 +351,21 @@ # ASM-NEXT: .cfi_offset w30, -8 # ASM-NEXT: .cfi_offset w29, -16 # +# ASM: .cfi_def_cfa wsp, 16 +# ASM: .cfi_def_cfa_offset 0 +# ASM-NEXT: .cfi_restore w30 +# ASM-NEXT: .cfi_restore w29 +# # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO: DW_CFA_def_cfa: reg29 +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 +# +# UNWINDINFO: DW_CFA_def_cfa: reg31 +16 +# UNWINDINFO: DW_CFA_def_cfa_offset: +0 +# UNWINDINFO-NEXT: DW_CFA_restore: reg30 +# UNWINDINFO-NEXT: DW_CFA_restore: reg29 + name: test_address_sve_fp frameInfo: maxAlignment: 16 @@ -334,8 +409,12 @@ # CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_stack_arg_sve: @@ -344,10 +423,20 @@ # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG # +# ASM: .cfi_def_cfa wsp, 32 +# ASM: .cfi_def_cfa_offset 16 +# ASM: .cfi_def_cfa_offset 0 +# ASM-NEXT: .cfi_restore w29 + # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# +# UNWINDINFO: DW_CFA_def_cfa: reg31 +32 +# UNWINDINFO: DW_CFA_def_cfa_offset: +16 +# UNWINDINFO: DW_CFA_def_cfa_offset: +0 +# UNWINDINFO-NEXT: DW_CFA_restore: reg29 name: test_stack_arg_sve fixedStack: @@ -407,15 +496,26 @@ # CHECK-NEXT: STR_PXI $p0, killed $[[TMP2]], 255 # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 31 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 9 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_address_sve_out_of_range: @@ -431,6 +531,18 @@ # ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 2048 * VG # ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 2056 * VG # +# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x0e, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1808 * VG +# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x98, 0x0c, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1560 * VG +# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x0a, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1312 * VG +# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa8, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1064 * VG +# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xb0, 0x06, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 816 * VG +# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xb8, 0x04, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 568 * VG +# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x02, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 320 * VG +# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc8, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 72 * VG +# ASM: .cfi_def_cfa wsp, 16 +# ASM: .cfi_def_cfa_offset 0 +# ASM-NEXT: .cfi_restore w29 + # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +256, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus @@ -442,6 +554,19 @@ # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +1792, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +2048, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +2056, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +1808, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +1560, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +1312, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +1064, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +816, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +568, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +320, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +72, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa: reg31 +16 +# UNWINDINFO: DW_CFA_def_cfa_offset: +0 +# UNWINDINFO-NEXT: DW_CFA_restore: reg29 + name: test_address_sve_out_of_range frameInfo: maxAlignment: 16 @@ -484,11 +609,24 @@ # ASM-NEXT: .cfi_offset w30, -24 # ASM-NEXT: .cfi_offset w29, -32 # +# ASM: .cfi_def_cfa wsp, 32 +# ASM: .cfi_def_cfa_offset 0 +# ASM-NEXT: .cfi_restore w19 +# ASM-NEXT: .cfi_restore w30 +# ASM-NEXT: .cfi_restore w29 + # UNWINDINFO: DW_CFA_def_cfa_offset: +32 # UNWINDINFO: DW_CFA_def_cfa: reg29 +32 # UNWINDINFO-NEXT: DW_CFA_offset: reg19 -16 # UNWINDINFO-NEXT: DW_CFA_offset: reg30 -24 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 +# +# UNWINDINFO: DW_CFA_def_cfa: reg31 +32 +# UNWINDINFO: DW_CFA_def_cfa_offset: +0 +# UNWINDINFO-NEXT: DW_CFA_restore: reg19 +# UNWINDINFO-NEXT: DW_CFA_restore: reg30 +# UNWINDINFO-NEXT: DW_CFA_restore: reg29 + name: test_address_gpr_vla frameInfo: maxAlignment: 16 @@ -528,10 +666,21 @@ # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 8 * VG # +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +# ASM: .cfi_def_cfa wsp, 16 +# ASM: .cfi_def_cfa_offset 0 +# ASM-NEXT: .cfi_restore w29 + # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO: DW_CFA_offset: reg29 -16 # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa: reg31 +16 +# UNWINDINFO: DW_CFA_def_cfa_offset: +0 +# UNWINDINFO-NEXT: DW_CFA_restore: reg29 + name: save_restore_pregs_sve stack: - { id: 0, stack-id: default, size: 32, alignment: 16 } @@ -558,11 +707,15 @@ # CHECK-NEXT: frame-setup CFI_INSTRUCTION # CHECK: $sp = frame-destroy ADDXri $sp, 32, 0 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $z10 = frame-destroy LDR_ZXI $sp, 0 # CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1 # CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3 +# CHECK-COUNT-4: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp, $fp = frame-destroy LDRXpost $sp, 16 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: save_restore_zregs_sve: @@ -573,6 +726,14 @@ # ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG # ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 24 * VG +# +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +# ASM: .cfi_def_cfa wsp, 16 +# ASM-NEXT: .cfi_restore z8 +# ASM-NEXT: .cfi_restore z9 +# ASM-NEXT: .cfi_restore z10 +# ASM: .cfi_def_cfa_offset 0 +# ASM-NEXT: .cfi_restore w29 # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 @@ -581,6 +742,14 @@ # UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa: reg31 +16 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg104 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg105 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg106 +# UNWINDINFO: DW_CFA_def_cfa_offset: +0 +# UNWINDINFO-NEXT: DW_CFA_restore: reg29 name: save_restore_zregs_sve stack: @@ -623,7 +792,9 @@ # CHECK: frame-setup CFI_INSTRUCTION # CHECK: $sp = frame-destroy ADDXri $sp, 32, 0 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 1 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK: $p15 = frame-destroy LDR_PXI $sp, 4 # CHECK: $p14 = frame-destroy LDR_PXI $sp, 5 # CHECK: $p5 = frame-destroy LDR_PXI $sp, 14 @@ -633,8 +804,10 @@ # CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16 # CHECK: $z8 = frame-destroy LDR_ZXI $sp, 17 # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 18 +# CHECK-COUNT-9: frame-destroy CFI_INSTRUCTION # CHECK: $x20, $x19 = frame-destroy LDPXi $sp, 2 # CHECK: $sp, ${{[a-z0-9]+}}, $x21 = frame-destroy LDPXpost $sp, 4 +# CHECK-COUNT-5: frame-destroy CFI_INSTRUCTION # CHECK: RET_ReallyLR # # ASM-LABEL: save_restore_sve: @@ -643,7 +816,7 @@ # ASM-NEXT: .cfi_offset w20, -16 # ASM-NEXT: .cfi_offset w21, -24 # ASM-NEXT: .cfi_offset w29, -32 -# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG +# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG # ASM: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG # ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG # ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG @@ -655,6 +828,23 @@ # ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG # ASM: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 152 * VG # +# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG +# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG +# ASM: .cfi_def_cfa wsp, 32 +# ASM-NEXT: .cfi_restore z8 +# ASM-NEXT: .cfi_restore z9 +# ASM-NEXT: .cfi_restore z10 +# ASM-NEXT: .cfi_restore z11 +# ASM-NEXT: .cfi_restore z12 +# ASM-NEXT: .cfi_restore z13 +# ASM-NEXT: .cfi_restore z14 +# ASM-NEXT: .cfi_restore z15 +# ASM: .cfi_def_cfa_offset 0 +# ASM-NEXT: .cfi_restore w19 +# ASM-NEXT: .cfi_restore w20 +# ASM-NEXT: .cfi_restore w21 +# ASM-NEXT: .cfi_restore w29 + # UNWINDINFO: DW_CFA_def_cfa_offset: +32 # UNWINDINFO: DW_CFA_offset: reg19 -8 # UNWINDINFO-NEXT: DW_CFA_offset: reg20 -16 @@ -671,6 +861,23 @@ # UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -64, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +144, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa: reg31 +32 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg104 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg105 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg106 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg107 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg108 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg109 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg110 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg111 +# UNWINDINFO: DW_CFA_def_cfa_offset: +0 +# UNWINDINFO-NEXT: DW_CFA_restore: reg19 +# UNWINDINFO-NEXT: DW_CFA_restore: reg20 +# UNWINDINFO-NEXT: DW_CFA_restore: reg21 +# UNWINDINFO-NEXT: DW_CFA_restore: reg29 name: save_restore_sve stack: @@ -737,8 +944,11 @@ # CHECK-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 3 # CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16 # CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 17 +# CHECK-COUNT-8: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 +# CHECK-COUNT-3: frame-destroy CFI_INSTRUCTION # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: save_restore_sve_realign: @@ -755,6 +965,19 @@ # ASM-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG # ASM-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG # +# ASM: .cfi_restore z8 +# ASM-NEXT: .cfi_restore z9 +# ASM-NEXT: .cfi_restore z10 +# ASM-NEXT: .cfi_restore z11 +# ASM-NEXT: .cfi_restore z12 +# ASM-NEXT: .cfi_restore z13 +# ASM-NEXT: .cfi_restore z14 +# ASM-NEXT: .cfi_restore z15 +# ASM: .cfi_def_cfa wsp, 16 +# ASM: .cfi_def_cfa_offset 0 +# ASM-NEXT: .cfi_restore w30 +# ASM-NEXT: .cfi_restore w29 +# # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO: DW_CFA_def_cfa: reg29 +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8 @@ -767,6 +990,20 @@ # UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -48, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -56, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -64, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# +# UNWINDINFO: DW_CFA_restore_extended: reg104 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg105 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg106 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg107 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg108 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg109 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg110 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg111 +# UNWINDINFO: DW_CFA_def_cfa: reg31 +16 +# UNWINDINFO: DW_CFA_def_cfa_offset: +0 +# UNWINDINFO-NEXT: DW_CFA_restore: reg30 +# UNWINDINFO-NEXT: DW_CFA_restore: reg29 + name: save_restore_sve_realign stack: - { id: 0, stack-id: scalable-vector, size: 16, alignment: 16 } @@ -845,6 +1082,19 @@ # CHECK-NEXT: frame-setup CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -7 # CHECK-NEXT: frame-setup CFI_INSTRUCTION + +# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 7 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION +# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 6 +# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 7 +# CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 1 +# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 +# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION +# CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.10) +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION # # ASM-LABEL: frame_layout: # ASM: .cfi_def_cfa_offset 16 @@ -853,11 +1103,24 @@ # ASM: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG # ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 80 * VG # +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +# ASM: .cfi_def_cfa wsp, 16 +# ASM-NEXT: .cfi_restore z8 +# ASM: .cfi_def_cfa_offset 0 +# ASM-NEXT: .cfi_restore w29 + # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_expression: reg72 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +80, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa: reg31 +16 +# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg104 +# UNWINDINFO: DW_CFA_def_cfa_offset: +0 +# UNWINDINFO-NEXT: DW_CFA_restore: reg29 + name: frame_layout stack: - { id: 0, type: default, size: 32, alignment: 16, stack-id: scalable-vector } Index: llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll =================================================================== --- llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll +++ llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll @@ -33,9 +33,15 @@ ; CHECK-NEXT: add x29, sp, #8 ; CHECK: sub sp, x29, #8 +; CHECK-NEXT: .cfi_def_cfa wsp, 32 ; CHECK-NEXT: ldr x19, [sp, #24] ; CHECK-NEXT: ldp x29, x30, [sp, #8] ; CHECK-NEXT: ldr d8, [sp], #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: ret declare i64 @d() Index: llvm/test/CodeGen/AArch64/hwasan-check-memaccess.ll =================================================================== --- llvm/test/CodeGen/AArch64/hwasan-check-memaccess.ll +++ llvm/test/CodeGen/AArch64/hwasan-check-memaccess.ll @@ -11,6 +11,8 @@ ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: bl __hwasan_check_x1_1 ; CHECK-NEXT: ldr x30, [sp], #16 + ; CHECK-NEXT: .cfi_def_cfa_offset 0 + ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call void @llvm.hwasan.check.memaccess(i8* %x0, i8* %x1, i32 1) ret i8* %x1 @@ -25,6 +27,9 @@ ; CHECK-NEXT: mov x20, x1 ; CHECK-NEXT: bl __hwasan_check_x0_2_short_v2 ; CHECK-NEXT: ldp x30, x20, [sp], #16 + ; CHECK-NEXT: .cfi_def_cfa_offset 0 + ; CHECK-NEXT: .cfi_restore w20 + ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret call void @llvm.hwasan.check.memaccess.shortgranules(i8* %x1, i8* %x0, i32 2) ret i8* %x0 Index: llvm/test/CodeGen/AArch64/isinf.ll =================================================================== --- llvm/test/CodeGen/AArch64/isinf.ll +++ llvm/test/CodeGen/AArch64/isinf.ll @@ -74,6 +74,8 @@ ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %abs = tail call fp128 @llvm.fabs.f128(fp128 %x) %cmpinf = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000 Index: llvm/test/CodeGen/AArch64/large-stack.ll =================================================================== --- llvm/test/CodeGen/AArch64/large-stack.ll +++ llvm/test/CodeGen/AArch64/large-stack.ll @@ -48,5 +48,10 @@ ; CHECK-COUNT-128: add sp, sp, #[[STACK1]], lsl #12 ; CHECK-NEXT: add sp, sp, #[[STACK2]], lsl #12 ; CHECK-NEXT: add sp, sp, #[[STACK3]] +; CHECK-NEXT: .cfi_def_cfa wsp, [[SPILL_OFFSET1]] ; CHECK-NEXT: ldr x[[SPILL_REG3]], [sp, #[[SPILL_OFFSET2]]] ; CHECK-NEXT: ldp x[[SPILL_REG1]], x[[SPILL_REG2]], [sp], #[[SPILL_OFFSET1]] +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w[[SPILL_REG3]] +; CHECK-NEXT: .cfi_restore w[[SPILL_REG2]] +; CHECK-NEXT: .cfi_restore w[[SPILL_REG1]] Index: llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll =================================================================== --- llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll +++ llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll @@ -36,6 +36,8 @@ ; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/local_vars.ll =================================================================== --- llvm/test/CodeGen/AArch64/local_vars.ll +++ llvm/test/CodeGen/AArch64/local_vars.ll @@ -43,9 +43,15 @@ ret void ; CHECK: ldr x30, [sp], #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret -; CHECK-WITHFP-ARM64: ldp x29, x30, [sp], #16 +; CHECK-WITHFP-ARM64: .cfi_def_cfa wsp, 16 +; CHECK-WITHFP-ARM64-NEXT: ldp x29, x30, [sp], #16 +; CHECK-WITHFP-ARM64-NEXT: .cfi_def_cfa_offset 0 +; CHECK-WITHFP-ARM64-NEXT: .cfi_restore w30 +; CHECK-WITHFP-ARM64-NEXT: .cfi_restore w29 ; CHECK-WITHFP-ARM64-NEXT: ret } Index: llvm/test/CodeGen/AArch64/ls64-inline-asm.ll =================================================================== --- llvm/test/CodeGen/AArch64/ls64-inline-asm.ll +++ llvm/test/CodeGen/AArch64/ls64-inline-asm.ll @@ -55,6 +55,7 @@ ; CHECK-NEXT: st64b x2, [x1] ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret entry: %0 = load i32, i32* %in, align 4 Index: llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll =================================================================== --- llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll +++ llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll @@ -36,6 +36,11 @@ ; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret entry: %cmp63 = icmp sgt i32 %n, 0 @@ -92,6 +97,11 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov w0, w21 ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret entry: %cmp63 = icmp sgt i32 %n, 0 @@ -149,6 +159,11 @@ ; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret entry: %cmp63 = icmp sgt i32 %n, 0 Index: llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll =================================================================== --- llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll +++ llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll @@ -102,7 +102,7 @@ ret void } -attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="none" "target-cpu"="cyclone" } +attributes #0 = { noredzone nounwind ssp uwtable minsize "frame-pointer"="none" "target-cpu"="cyclone" } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5, !6} Index: llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll =================================================================== --- llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll +++ llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll @@ -70,7 +70,7 @@ ret i32 %add } -attributes #0 = { "sign-return-address"="non-leaf" } +attributes #0 = { "sign-return-address"="non-leaf" minsize } ; CHECK-NOT: OUTLINED_FUNCTION_{{.*}} ; CHECK-NOT: .cfi_b_key_frame Index: llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll =================================================================== --- llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll +++ llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll @@ -15,6 +15,8 @@ ; CHECK-NEXT: bl OUTLINED_FUNCTION_1 ; CHECK-NEXT: add w0, w0, #8 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret entry: %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) @@ -31,6 +33,8 @@ ; CHECK-NEXT: bl OUTLINED_FUNCTION_1 ; CHECK-NEXT: add w0, w0, #88 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret entry: %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) @@ -47,6 +51,8 @@ ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: add w0, w0, #8 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret entry: %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4) @@ -63,6 +69,8 @@ ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: add w0, w0, #88 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret entry: %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4) Index: llvm/test/CodeGen/AArch64/merge-store-dependency.ll =================================================================== --- llvm/test/CodeGen/AArch64/merge-store-dependency.ll +++ llvm/test/CodeGen/AArch64/merge-store-dependency.ll @@ -45,6 +45,9 @@ ; A53-NEXT: adrp x8, gv1 ; A53-NEXT: str x0, [x8, :lo12:gv1] ; A53-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; A53-NEXT: .cfi_def_cfa_offset 0 +; A53-NEXT: .cfi_restore w19 +; A53-NEXT: .cfi_restore w30 ; A53-NEXT: ret ; A53-NEXT: .LBB0_4: // %while.body.i.split ; A53-NEXT: // =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/AArch64/neg-imm.ll =================================================================== --- llvm/test/CodeGen/AArch64/neg-imm.ll +++ llvm/test/CodeGen/AArch64/neg-imm.ll @@ -36,6 +36,10 @@ ; CHECK-NEXT: .LBB0_4: // %for.cond.cleanup ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret entry: %sub = add nsw i32 %px, -1 Index: llvm/test/CodeGen/AArch64/optimize-cond-branch.ll =================================================================== --- llvm/test/CodeGen/AArch64/optimize-cond-branch.ll +++ llvm/test/CodeGen/AArch64/optimize-cond-branch.ll @@ -24,12 +24,16 @@ ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: and w0, w8, #0x100 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: cbz w0, .LBB0_5 ; CHECK-NEXT: .LBB0_3: // %common.ret.sink.split ; CHECK-NEXT: b extfunc ; CHECK-NEXT: .LBB0_4: // %b2 ; CHECK-NEXT: bl extfunc ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: cbnz w0, .LBB0_3 ; CHECK-NEXT: .LBB0_5: // %common.ret ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll =================================================================== --- llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -175,6 +175,15 @@ ; CHECK-NEXT: stp q4, q3, [x8, #432] ; CHECK-NEXT: str q0, [x8, #496] ; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 ; CHECK-NEXT: ret ; CH`ECK-NEXT: .cfi_offset b9, -16 entry: Index: llvm/test/CodeGen/AArch64/settag.ll =================================================================== --- llvm/test/CodeGen/AArch64/settag.ll +++ llvm/test/CodeGen/AArch64/settag.ll @@ -122,6 +122,7 @@ ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: stg sp, [sp], #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret entry: %a = alloca i8, i32 16, align 16 @@ -137,6 +138,7 @@ ; CHECK-NEXT: st2g sp, [sp, #32] ; CHECK-NEXT: stg sp, [sp, #64] ; CHECK-NEXT: st2g sp, [sp], #80 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret entry: %a = alloca i8, i32 80, align 16 @@ -159,7 +161,10 @@ ; CHECK-NEXT: cbnz x8, .LBB11_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: stg sp, [sp], #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret entry: %a = alloca i8, i32 272, align 16 Index: llvm/test/CodeGen/AArch64/shrink-wrapping-vla.ll =================================================================== --- llvm/test/CodeGen/AArch64/shrink-wrapping-vla.ll +++ llvm/test/CodeGen/AArch64/shrink-wrapping-vla.ll @@ -80,6 +80,10 @@ ; CHECK: stp x29, x30, [sp, #-16]! ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 + ; VLA allocation ; CHECK: mov [[X2:x[0-9]+]], sp @@ -93,4 +97,9 @@ ; CHECK: mov sp, [[SAVE]] ; Epilogue ; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldp x29, x30, [sp], #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/sibling-call.ll =================================================================== --- llvm/test/CodeGen/AArch64/sibling-call.ll +++ llvm/test/CodeGen/AArch64/sibling-call.ll @@ -34,6 +34,8 @@ ; CHECK-NEXT: bl callee_stack8 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; Caller isn't going to clean up any extra stack we allocate, so it @@ -64,6 +66,8 @@ ; CHECK-NEXT: bl callee_stack16 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; Shouldn't be a tail call: we can't use SP+8 because our caller might Index: llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll =================================================================== --- llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll +++ llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll @@ -147,6 +147,7 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr w8, [sp, #12] ; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp ; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 ; CHECK-NEXT: mov sp, [[TMPREG]] Index: llvm/test/CodeGen/AArch64/split-vector-insert.ll =================================================================== --- llvm/test/CodeGen/AArch64/split-vector-insert.ll +++ llvm/test/CodeGen/AArch64/split-vector-insert.ll @@ -52,7 +52,10 @@ ; CHECK-NEXT: str q4, [x9, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #3, mul vl] ; CHECK-NEXT: addvl sp, sp, #4 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret @@ -104,7 +107,10 @@ ; CHECK-NEXT: str q4, [x9, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #3, mul vl] ; CHECK-NEXT: addvl sp, sp, #4 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/stack-guard-sysreg.ll =================================================================== --- llvm/test/CodeGen/AArch64/stack-guard-sysreg.ll +++ llvm/test/CodeGen/AArch64/stack-guard-sysreg.ll @@ -81,7 +81,11 @@ ; CHECK-NEXT: b.ne .LBB0_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %entry ; CHECK-NEXT: bl __stack_chk_fail Index: llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll =================================================================== --- llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll +++ llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -88,6 +88,7 @@ ; CHECK-NEXT: .Ltmp7: ; CHECK-NEXT: and w0, w0, #0x1 ; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret ; Check that an ununsed relocate has no code-generation impact entry: @@ -154,6 +155,10 @@ ; CHECK-NEXT: and w0, w19, #0x1 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret entry: %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a)] @@ -188,6 +193,7 @@ ; CHECK-NEXT: bl consume_attributes ; CHECK-NEXT: .Ltmp11: ; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret entry: ; Check that arguments with attributes are lowered correctly. Index: llvm/test/CodeGen/AArch64/storepairsuppress_minsize.ll =================================================================== --- llvm/test/CodeGen/AArch64/storepairsuppress_minsize.ll +++ llvm/test/CodeGen/AArch64/storepairsuppress_minsize.ll @@ -23,6 +23,8 @@ ; CHECK-NEXT: str d4, [x8, #32] ; CHECK-NEXT: str d5, [x8, #40] ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %1 = call %T_IN_BLOCK @return_in_block() store %T_IN_BLOCK %1, %T_IN_BLOCK* @in_block_store @@ -61,6 +63,8 @@ ; CHECK-NEXT: stp d2, d3, [x8, #16] ; CHECK-NEXT: stp d4, d5, [x8, #32] ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %1 = call %T_IN_BLOCK @return_in_block() store %T_IN_BLOCK %1, %T_IN_BLOCK* @in_block_store Index: llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll +++ llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll @@ -90,7 +90,10 @@ ; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h ; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b ; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv14i1.nxv28i1( %in, i64 14) ret %res Index: llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll @@ -100,7 +100,11 @@ ; CHECK-NEXT: sel z0.h, p1, z1.h, z2.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %mask = load <16 x i1>, <16 x i1>* %c %op1 = load <16 x half>, <16 x half>* %a @@ -197,7 +201,11 @@ ; VBITS_GE_512-NEXT: sel z0.h, p1, z1.h, z2.h ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_512-NEXT: mov sp, x29 +; VBITS_GE_512-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_512-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_512-NEXT: .cfi_restore w30 +; VBITS_GE_512-NEXT: .cfi_restore w29 ; VBITS_GE_512-NEXT: ret %mask = load <32 x i1>, <32 x i1>* %c %op1 = load <32 x half>, <32 x half>* %a @@ -358,7 +366,11 @@ ; VBITS_GE_1024-NEXT: sel z0.h, p1, z1.h, z2.h ; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_1024-NEXT: mov sp, x29 +; VBITS_GE_1024-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_1024-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_1024-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_1024-NEXT: .cfi_restore w30 +; VBITS_GE_1024-NEXT: .cfi_restore w29 ; VBITS_GE_1024-NEXT: ret %mask = load <64 x i1>, <64 x i1>* %c %op1 = load <64 x half>, <64 x half>* %a @@ -648,7 +660,11 @@ ; VBITS_GE_2048-NEXT: sel z0.h, p1, z1.h, z2.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_2048-NEXT: mov sp, x29 +; VBITS_GE_2048-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_2048-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_2048-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_2048-NEXT: .cfi_restore w30 +; VBITS_GE_2048-NEXT: .cfi_restore w29 ; VBITS_GE_2048-NEXT: ret %mask = load <128 x i1>, <128 x i1>* %c %op1 = load <128 x half>, <128 x half>* %a @@ -718,7 +734,11 @@ ; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %mask = load <8 x i1>, <8 x i1>* %c %op1 = load <8 x float>, <8 x float>* %a @@ -775,7 +795,11 @@ ; VBITS_GE_512-NEXT: sel z0.s, p1, z1.s, z2.s ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_512-NEXT: mov sp, x29 +; VBITS_GE_512-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_512-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_512-NEXT: .cfi_restore w30 +; VBITS_GE_512-NEXT: .cfi_restore w29 ; VBITS_GE_512-NEXT: ret %mask = load <16 x i1>, <16 x i1>* %c %op1 = load <16 x float>, <16 x float>* %a @@ -856,7 +880,11 @@ ; VBITS_GE_1024-NEXT: sel z0.s, p1, z1.s, z2.s ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_1024-NEXT: mov sp, x29 +; VBITS_GE_1024-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_1024-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_1024-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_1024-NEXT: .cfi_restore w30 +; VBITS_GE_1024-NEXT: .cfi_restore w29 ; VBITS_GE_1024-NEXT: ret %mask = load <32 x i1>, <32 x i1>* %c %op1 = load <32 x float>, <32 x float>* %a @@ -985,7 +1013,11 @@ ; VBITS_GE_2048-NEXT: sel z0.s, p1, z1.s, z2.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: mov sp, x29 +; VBITS_GE_2048-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_2048-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_2048-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_2048-NEXT: .cfi_restore w30 +; VBITS_GE_2048-NEXT: .cfi_restore w29 ; VBITS_GE_2048-NEXT: ret %mask = load <64 x i1>, <64 x i1>* %c %op1 = load <64 x float>, <64 x float>* %a @@ -1053,7 +1085,11 @@ ; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %mask = load <4 x i1>, <4 x i1>* %c %op1 = load <4 x double>, <4 x double>* %a @@ -1105,7 +1141,11 @@ ; VBITS_GE_512-NEXT: sel z0.d, p1, z1.d, z2.d ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_512-NEXT: mov sp, x29 +; VBITS_GE_512-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_512-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_512-NEXT: .cfi_restore w30 +; VBITS_GE_512-NEXT: .cfi_restore w29 ; VBITS_GE_512-NEXT: ret %mask = load <8 x i1>, <8 x i1>* %c %op1 = load <8 x double>, <8 x double>* %a @@ -1177,7 +1217,11 @@ ; VBITS_GE_1024-NEXT: sel z0.d, p1, z1.d, z2.d ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_1024-NEXT: mov sp, x29 +; VBITS_GE_1024-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_1024-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_1024-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_1024-NEXT: .cfi_restore w30 +; VBITS_GE_1024-NEXT: .cfi_restore w29 ; VBITS_GE_1024-NEXT: ret %mask = load <16 x i1>, <16 x i1>* %c %op1 = load <16 x double>, <16 x double>* %a @@ -1320,7 +1364,11 @@ ; VBITS_GE_2048-NEXT: sel z0.d, p1, z1.d, z2.d ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_2048-NEXT: mov sp, x29 +; VBITS_GE_2048-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_2048-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_2048-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_2048-NEXT: .cfi_restore w30 +; VBITS_GE_2048-NEXT: .cfi_restore w29 ; VBITS_GE_2048-NEXT: ret %mask = load <32 x i1>, <32 x i1>* %c %op1 = load <32 x double>, <32 x double>* %a Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll @@ -131,7 +131,11 @@ ; CHECK-NEXT: sel z0.b, p1, z1.b, z2.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %mask = load <32 x i1>, <32 x i1>* %c %op1 = load <32 x i8>, <32 x i8>* %a @@ -292,7 +296,11 @@ ; VBITS_GE_512-NEXT: sel z0.b, p1, z1.b, z2.b ; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0] ; VBITS_GE_512-NEXT: mov sp, x29 +; VBITS_GE_512-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_512-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_512-NEXT: .cfi_restore w30 +; VBITS_GE_512-NEXT: .cfi_restore w29 ; VBITS_GE_512-NEXT: ret %mask = load <64 x i1>, <64 x i1>* %c %op1 = load <64 x i8>, <64 x i8>* %a @@ -582,7 +590,11 @@ ; VBITS_GE_1024-NEXT: sel z0.b, p1, z1.b, z2.b ; VBITS_GE_1024-NEXT: st1b { z0.b }, p0, [x0] ; VBITS_GE_1024-NEXT: mov sp, x29 +; VBITS_GE_1024-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_1024-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_1024-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_1024-NEXT: .cfi_restore w30 +; VBITS_GE_1024-NEXT: .cfi_restore w29 ; VBITS_GE_1024-NEXT: ret %mask = load <128 x i1>, <128 x i1>* %c %op1 = load <128 x i8>, <128 x i8>* %a @@ -1130,7 +1142,11 @@ ; VBITS_GE_2048-NEXT: sel z0.b, p1, z1.b, z2.b ; VBITS_GE_2048-NEXT: st1b { z0.b }, p0, [x0] ; VBITS_GE_2048-NEXT: mov sp, x29 +; VBITS_GE_2048-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_2048-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_2048-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_2048-NEXT: .cfi_restore w30 +; VBITS_GE_2048-NEXT: .cfi_restore w29 ; VBITS_GE_2048-NEXT: ret %mask = load <256 x i1>, <256 x i1>* %c %op1 = load <256 x i8>, <256 x i8>* %a @@ -1220,7 +1236,11 @@ ; CHECK-NEXT: sel z0.h, p1, z1.h, z2.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %mask = load <16 x i1>, <16 x i1>* %c %op1 = load <16 x i16>, <16 x i16>* %a @@ -1317,7 +1337,11 @@ ; VBITS_GE_512-NEXT: sel z0.h, p1, z1.h, z2.h ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_512-NEXT: mov sp, x29 +; VBITS_GE_512-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_512-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_512-NEXT: .cfi_restore w30 +; VBITS_GE_512-NEXT: .cfi_restore w29 ; VBITS_GE_512-NEXT: ret %mask = load <32 x i1>, <32 x i1>* %c %op1 = load <32 x i16>, <32 x i16>* %a @@ -1478,7 +1502,11 @@ ; VBITS_GE_1024-NEXT: sel z0.h, p1, z1.h, z2.h ; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_1024-NEXT: mov sp, x29 +; VBITS_GE_1024-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_1024-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_1024-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_1024-NEXT: .cfi_restore w30 +; VBITS_GE_1024-NEXT: .cfi_restore w29 ; VBITS_GE_1024-NEXT: ret %mask = load <64 x i1>, <64 x i1>* %c %op1 = load <64 x i16>, <64 x i16>* %a @@ -1768,7 +1796,11 @@ ; VBITS_GE_2048-NEXT: sel z0.h, p1, z1.h, z2.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_2048-NEXT: mov sp, x29 +; VBITS_GE_2048-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_2048-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_2048-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_2048-NEXT: .cfi_restore w30 +; VBITS_GE_2048-NEXT: .cfi_restore w29 ; VBITS_GE_2048-NEXT: ret %mask = load <128 x i1>, <128 x i1>* %c %op1 = load <128 x i16>, <128 x i16>* %a @@ -1838,7 +1870,11 @@ ; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %mask = load <8 x i1>, <8 x i1>* %c %op1 = load <8 x i32>, <8 x i32>* %a @@ -1895,7 +1931,11 @@ ; VBITS_GE_512-NEXT: sel z0.s, p1, z1.s, z2.s ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_512-NEXT: mov sp, x29 +; VBITS_GE_512-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_512-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_512-NEXT: .cfi_restore w30 +; VBITS_GE_512-NEXT: .cfi_restore w29 ; VBITS_GE_512-NEXT: ret %mask = load <16 x i1>, <16 x i1>* %c %op1 = load <16 x i32>, <16 x i32>* %a @@ -1976,7 +2016,11 @@ ; VBITS_GE_1024-NEXT: sel z0.s, p1, z1.s, z2.s ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_1024-NEXT: mov sp, x29 +; VBITS_GE_1024-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_1024-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_1024-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_1024-NEXT: .cfi_restore w30 +; VBITS_GE_1024-NEXT: .cfi_restore w29 ; VBITS_GE_1024-NEXT: ret %mask = load <32 x i1>, <32 x i1>* %c %op1 = load <32 x i32>, <32 x i32>* %a @@ -2105,7 +2149,11 @@ ; VBITS_GE_2048-NEXT: sel z0.s, p1, z1.s, z2.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: mov sp, x29 +; VBITS_GE_2048-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_2048-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_2048-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_2048-NEXT: .cfi_restore w30 +; VBITS_GE_2048-NEXT: .cfi_restore w29 ; VBITS_GE_2048-NEXT: ret %mask = load <64 x i1>, <64 x i1>* %c %op1 = load <64 x i32>, <64 x i32>* %a @@ -2173,7 +2221,11 @@ ; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %mask = load <4 x i1>, <4 x i1>* %c %op1 = load <4 x i64>, <4 x i64>* %a @@ -2225,7 +2277,11 @@ ; VBITS_GE_512-NEXT: sel z0.d, p1, z1.d, z2.d ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_512-NEXT: mov sp, x29 +; VBITS_GE_512-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_512-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_512-NEXT: .cfi_restore w30 +; VBITS_GE_512-NEXT: .cfi_restore w29 ; VBITS_GE_512-NEXT: ret %mask = load <8 x i1>, <8 x i1>* %c %op1 = load <8 x i64>, <8 x i64>* %a @@ -2297,7 +2353,11 @@ ; VBITS_GE_1024-NEXT: sel z0.d, p1, z1.d, z2.d ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_1024-NEXT: mov sp, x29 +; VBITS_GE_1024-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_1024-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_1024-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_1024-NEXT: .cfi_restore w30 +; VBITS_GE_1024-NEXT: .cfi_restore w29 ; VBITS_GE_1024-NEXT: ret %mask = load <16 x i1>, <16 x i1>* %c %op1 = load <16 x i64>, <16 x i64>* %a @@ -2440,7 +2500,11 @@ ; VBITS_GE_2048-NEXT: sel z0.d, p1, z1.d, z2.d ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_2048-NEXT: mov sp, x29 +; VBITS_GE_2048-NEXT: .cfi_def_cfa wsp, 16 ; VBITS_GE_2048-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; VBITS_GE_2048-NEXT: .cfi_def_cfa_offset 0 +; VBITS_GE_2048-NEXT: .cfi_restore w30 +; VBITS_GE_2048-NEXT: .cfi_restore w29 ; VBITS_GE_2048-NEXT: ret %mask = load <32 x i1>, <32 x i1>* %c %op1 = load <32 x i64>, <32 x i64>* %a Index: llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll @@ -951,7 +951,11 @@ ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %op1 = load <4 x double>, <4 x double>* %a %op2 = load <4 x double>, <4 x double>* %b Index: llvm/test/CodeGen/AArch64/sve-insert-element.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -524,7 +524,10 @@ ; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 ; CHECK-NEXT: cmpne p1.b, p1/z, z1.b, #0 ; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %res = insertelement %val, i1 %elt, i32 %idx ret %res Index: llvm/test/CodeGen/AArch64/sve-insert-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -228,7 +228,10 @@ ; CHECK-NEXT: st1d { z0.d }, p0, [x0, #1, mul vl] ; CHECK-NEXT: st1d { z3.d }, p0, [x0] ; CHECK-NEXT: addvl sp, sp, #4 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %v0 = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv0, i64 0) %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( %v0, <2 x i64> %sv1, i64 4) @@ -266,7 +269,10 @@ ; CHECK-NEXT: st1d { z0.d }, p0, [x1, #1, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [x1] ; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %sv = load <2 x i64>, <2 x i64>* %psv %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv, i64 2) Index: llvm/test/CodeGen/AArch64/sve-ld1r.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ld1r.ll +++ llvm/test/CodeGen/AArch64/sve-ld1r.ll @@ -26,6 +26,7 @@ ; CHECK-NEXT: strb w8, [sp, #12] ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [sp, #14] ; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %valp = alloca i8 %valp2 = load volatile i8, i8* @g8 Index: llvm/test/CodeGen/AArch64/sve-pred-arith.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-pred-arith.ll +++ llvm/test/CodeGen/AArch64/sve-pred-arith.ll @@ -74,7 +74,10 @@ ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %res = add %a, %b ret %res; @@ -155,7 +158,10 @@ ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %res = sub %a, %b ret %res; Index: llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -36,7 +36,10 @@ ; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: ldrb w0, [x10, x8] ; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i8 %ext @@ -62,7 +65,10 @@ ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: ldrh w0, [x10, x8, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i16 %ext @@ -88,7 +94,10 @@ ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: ldr w0, [x10, x8, lsl #2] ; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i32 %ext @@ -116,7 +125,10 @@ ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: ldr x0, [x8, x9, lsl #3] ; CHECK-NEXT: addvl sp, sp, #4 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i64 %ext @@ -161,7 +173,10 @@ ; CHECK-NEXT: csel x8, x8, x10, lo ; CHECK-NEXT: ldrh w0, [x9, x8, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %ext = extractelement %a, i32 128 ret i16 %ext @@ -189,7 +204,10 @@ ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: ldr w0, [x9, x8, lsl #2] ; CHECK-NEXT: addvl sp, sp, #4 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %ext = extractelement %a, i32 100000 ret i32 %ext @@ -214,7 +232,10 @@ ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: ldr x0, [x9, x8, lsl #3] ; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %ext = extractelement %a, i32 10 ret i64 %ext Index: llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll +++ llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll @@ -36,7 +36,10 @@ ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x9, #1, mul vl] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %ins = insertelement %a, i8 %elt, i64 %idx ret %ins @@ -62,7 +65,10 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x9, #1, mul vl] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %ins = insertelement %a, float %elt, i64 %idx ret %ins @@ -92,7 +98,10 @@ ; CHECK-NEXT: ld1d { z3.d }, p0/z, [x9, #3, mul vl] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #4 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %ins = insertelement %a, i64 %elt, i64 %idx ret %ins @@ -156,7 +165,10 @@ ; CHECK-NEXT: ld1h { z3.h }, p0/z, [x9, #3, mul vl] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #4 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %ins = insertelement %a, i16 %elt, i64 128 ret %ins @@ -184,7 +196,10 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %ins = insertelement %a, i32 %elt, i64 1000000 ret %ins Index: llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll +++ llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll @@ -34,7 +34,10 @@ ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.and.nxv64i1( %a) ret i1 %res Index: llvm/test/CodeGen/AArch64/sve-trunc.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-trunc.ll +++ llvm/test/CodeGen/AArch64/sve-trunc.ll @@ -143,7 +143,10 @@ ; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h ; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b ; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret entry: %out = trunc %in to Index: llvm/test/CodeGen/AArch64/sve-varargs.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-varargs.ll +++ llvm/test/CodeGen/AArch64/sve-varargs.ll @@ -15,6 +15,8 @@ ; CHECK-NEXT: add x0, x0, :lo12:.str_1 ; CHECK-NEXT: bl sve_printf ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %f = getelementptr [6 x i8], [6 x i8]* @.str_1, i64 0, i64 0 call i32 (i8*, , ...) @sve_printf(i8* %f, %x) Index: llvm/test/CodeGen/AArch64/swifttail-call.ll =================================================================== --- llvm/test/CodeGen/AArch64/swifttail-call.ll +++ llvm/test/CodeGen/AArch64/swifttail-call.ll @@ -23,6 +23,7 @@ ret void ; COMMON: add sp, sp, #16 +; COMMON-NEXT: .cfi_def_cfa_offset -16 ; COMMON-NEXT: b callee_stack0 } @@ -74,6 +75,7 @@ ret void ; COMMON: str {{x[0-9]+}}, [sp, #16]! +; COMMON-NEXT: .cfi_def_cfa_offset -16 ; COMMON-NEXT: b callee_stack8 } Index: llvm/test/CodeGen/AArch64/tail-call.ll =================================================================== --- llvm/test/CodeGen/AArch64/tail-call.ll +++ llvm/test/CodeGen/AArch64/tail-call.ll @@ -23,6 +23,7 @@ ret void ; COMMON: add sp, sp, #16 +; COMMON: .cfi_def_cfa_offset -16 ; COMMON-NEXT: b callee_stack0 } @@ -74,6 +75,7 @@ ret void ; COMMON: str {{x[0-9]+}}, [sp, #16]! +; COMMON: .cfi_def_cfa_offset -16 ; COMMON-NEXT: b callee_stack8 } Index: llvm/test/CodeGen/AArch64/tailcc-tail-call.ll =================================================================== --- llvm/test/CodeGen/AArch64/tailcc-tail-call.ll +++ llvm/test/CodeGen/AArch64/tailcc-tail-call.ll @@ -23,6 +23,7 @@ ret void ; COMMON: add sp, sp, #16 +; COMMON: .cfi_def_cfa_offset -16 ; COMMON-NEXT: b callee_stack0 } @@ -40,6 +41,9 @@ ; from an interrupt if the kernel does not honour a red-zone, and a larger ; call could well overflow the red zone even if it is present. ; COMMON-NOT: sub sp, +; COMMON-NEXT: .cfi_def_cfa_offset 16 +; COMMON-NEXT: .cfi_restore w30 +; COMMON-NEXT: .cfi_restore w29 ; COMMON-NEXT: b callee_stack8 } @@ -79,6 +83,7 @@ ret void ; COMMON: str {{x[0-9]+}}, [sp, #16]! +; COMMON-NEXT: .cfi_def_cfa_offset -16 ; COMMON-NEXT: b callee_stack8 } Index: llvm/test/CodeGen/AArch64/unwind-preserved.ll =================================================================== --- llvm/test/CodeGen/AArch64/unwind-preserved.ll +++ llvm/test/CodeGen/AArch64/unwind-preserved.ll @@ -61,6 +61,7 @@ ; CHECK-NEXT: .LBB0_1: // %.Lcontinue ; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload @@ -90,12 +91,25 @@ ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 +; CHECK-NEXT: .cfi_restore z8 +; CHECK-NEXT: .cfi_restore z9 +; CHECK-NEXT: .cfi_restore z10 +; CHECK-NEXT: .cfi_restore z11 +; CHECK-NEXT: .cfi_restore z12 +; CHECK-NEXT: .cfi_restore z13 +; CHECK-NEXT: .cfi_restore z14 +; CHECK-NEXT: .cfi_restore z15 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %.Lunwind ; CHECK-NEXT: .Ltmp2: ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload @@ -125,7 +139,19 @@ ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 +; CHECK-NEXT: .cfi_restore z8 +; CHECK-NEXT: .cfi_restore z9 +; CHECK-NEXT: .cfi_restore z10 +; CHECK-NEXT: .cfi_restore z11 +; CHECK-NEXT: .cfi_restore z12 +; CHECK-NEXT: .cfi_restore z13 +; CHECK-NEXT: .cfi_restore z14 +; CHECK-NEXT: .cfi_restore z15 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret ; ; GISEL-LABEL: invoke_callee_may_throw_sve: @@ -185,6 +211,7 @@ ; GISEL-NEXT: .LBB0_1: // %.Lcontinue ; GISEL-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #2 +; GISEL-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG ; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload @@ -214,12 +241,25 @@ ; GISEL-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #18 +; GISEL-NEXT: .cfi_def_cfa wsp, 16 +; GISEL-NEXT: .cfi_restore z8 +; GISEL-NEXT: .cfi_restore z9 +; GISEL-NEXT: .cfi_restore z10 +; GISEL-NEXT: .cfi_restore z11 +; GISEL-NEXT: .cfi_restore z12 +; GISEL-NEXT: .cfi_restore z13 +; GISEL-NEXT: .cfi_restore z14 +; GISEL-NEXT: .cfi_restore z15 ; GISEL-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; GISEL-NEXT: .cfi_def_cfa_offset 0 +; GISEL-NEXT: .cfi_restore w30 +; GISEL-NEXT: .cfi_restore w29 ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB0_2: // %.Lunwind ; GISEL-NEXT: .Ltmp2: ; GISEL-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #2 +; GISEL-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG ; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload @@ -249,7 +289,19 @@ ; GISEL-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #18 +; GISEL-NEXT: .cfi_def_cfa wsp, 16 +; GISEL-NEXT: .cfi_restore z8 +; GISEL-NEXT: .cfi_restore z9 +; GISEL-NEXT: .cfi_restore z10 +; GISEL-NEXT: .cfi_restore z11 +; GISEL-NEXT: .cfi_restore z12 +; GISEL-NEXT: .cfi_restore z13 +; GISEL-NEXT: .cfi_restore z14 +; GISEL-NEXT: .cfi_restore z15 ; GISEL-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; GISEL-NEXT: .cfi_def_cfa_offset 0 +; GISEL-NEXT: .cfi_restore w30 +; GISEL-NEXT: .cfi_restore w29 ; GISEL-NEXT: ret %result = invoke @may_throw_sve( %v) to label %.Lcontinue unwind label %.Lunwind .Lcontinue: @@ -315,6 +367,25 @@ ; CHECK-NEXT: ldp q21, q20, [sp, #64] // 32-byte Folded Reload ; CHECK-NEXT: ldp q23, q22, [sp, #32] // 32-byte Folded Reload ; CHECK-NEXT: add sp, sp, #304 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: .cfi_restore b16 +; CHECK-NEXT: .cfi_restore b17 +; CHECK-NEXT: .cfi_restore b18 +; CHECK-NEXT: .cfi_restore b19 +; CHECK-NEXT: .cfi_restore b20 +; CHECK-NEXT: .cfi_restore b21 +; CHECK-NEXT: .cfi_restore b22 +; CHECK-NEXT: .cfi_restore b23 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: // %.Lunwind ; CHECK-NEXT: .Ltmp5: @@ -329,6 +400,25 @@ ; CHECK-NEXT: ldp q21, q20, [sp, #64] // 32-byte Folded Reload ; CHECK-NEXT: ldp q23, q22, [sp, #32] // 32-byte Folded Reload ; CHECK-NEXT: add sp, sp, #304 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: .cfi_restore b16 +; CHECK-NEXT: .cfi_restore b17 +; CHECK-NEXT: .cfi_restore b18 +; CHECK-NEXT: .cfi_restore b19 +; CHECK-NEXT: .cfi_restore b20 +; CHECK-NEXT: .cfi_restore b21 +; CHECK-NEXT: .cfi_restore b22 +; CHECK-NEXT: .cfi_restore b23 ; CHECK-NEXT: ret ; ; GISEL-LABEL: invoke_callee_may_throw_neon: @@ -382,6 +472,25 @@ ; GISEL-NEXT: ldp q21, q20, [sp, #64] // 32-byte Folded Reload ; GISEL-NEXT: ldp q23, q22, [sp, #32] // 32-byte Folded Reload ; GISEL-NEXT: add sp, sp, #304 +; GISEL-NEXT: .cfi_def_cfa_offset 0 +; GISEL-NEXT: .cfi_restore w30 +; GISEL-NEXT: .cfi_restore w29 +; GISEL-NEXT: .cfi_restore b8 +; GISEL-NEXT: .cfi_restore b9 +; GISEL-NEXT: .cfi_restore b10 +; GISEL-NEXT: .cfi_restore b11 +; GISEL-NEXT: .cfi_restore b12 +; GISEL-NEXT: .cfi_restore b13 +; GISEL-NEXT: .cfi_restore b14 +; GISEL-NEXT: .cfi_restore b15 +; GISEL-NEXT: .cfi_restore b16 +; GISEL-NEXT: .cfi_restore b17 +; GISEL-NEXT: .cfi_restore b18 +; GISEL-NEXT: .cfi_restore b19 +; GISEL-NEXT: .cfi_restore b20 +; GISEL-NEXT: .cfi_restore b21 +; GISEL-NEXT: .cfi_restore b22 +; GISEL-NEXT: .cfi_restore b23 ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB1_2: // %.Lunwind ; GISEL-NEXT: .Ltmp5: @@ -396,6 +505,25 @@ ; GISEL-NEXT: ldp q21, q20, [sp, #64] // 32-byte Folded Reload ; GISEL-NEXT: ldp q23, q22, [sp, #32] // 32-byte Folded Reload ; GISEL-NEXT: add sp, sp, #304 +; GISEL-NEXT: .cfi_def_cfa_offset 0 +; GISEL-NEXT: .cfi_restore w30 +; GISEL-NEXT: .cfi_restore w29 +; GISEL-NEXT: .cfi_restore b8 +; GISEL-NEXT: .cfi_restore b9 +; GISEL-NEXT: .cfi_restore b10 +; GISEL-NEXT: .cfi_restore b11 +; GISEL-NEXT: .cfi_restore b12 +; GISEL-NEXT: .cfi_restore b13 +; GISEL-NEXT: .cfi_restore b14 +; GISEL-NEXT: .cfi_restore b15 +; GISEL-NEXT: .cfi_restore b16 +; GISEL-NEXT: .cfi_restore b17 +; GISEL-NEXT: .cfi_restore b18 +; GISEL-NEXT: .cfi_restore b19 +; GISEL-NEXT: .cfi_restore b20 +; GISEL-NEXT: .cfi_restore b21 +; GISEL-NEXT: .cfi_restore b22 +; GISEL-NEXT: .cfi_restore b23 ; GISEL-NEXT: ret %result = invoke aarch64_vector_pcs <4 x i32> @may_throw_neon(<4 x i32> %v) to label %.Lcontinue unwind label %.Lunwind .Lcontinue: Index: llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll =================================================================== --- llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll +++ llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll @@ -171,6 +171,11 @@ ; CHECK-NEXT: .LBB3_4: // %while_end ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_5: // %cleanup ; CHECK-NEXT: .Ltmp2: Index: llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected =================================================================== --- llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected +++ llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected @@ -91,8 +91,12 @@ ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: .LBB0_5: ; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: .cfi_def_cfa wsp, 48 ; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret ; ; CHECK-LABEL: main: @@ -118,8 +122,12 @@ ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: stp w10, w8, [x29, #-12] ; CHECK-NEXT: stp w9, w11, [sp, #12] +; CHECK-NEXT: .cfi_def_cfa wsp, 48 ; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret ; ; CHECK-LABEL: OUTLINED_FUNCTION_0: Index: llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected =================================================================== --- llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected +++ llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected @@ -32,8 +32,12 @@ ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: .LBB0_5: ; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: .cfi_def_cfa wsp, 48 ; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %1 = alloca i32, align 4 %2 = alloca i32, align 4 @@ -95,8 +99,12 @@ ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: stp w10, w8, [x29, #-12] ; CHECK-NEXT: stp w9, w11, [sp, #12] +; CHECK-NEXT: .cfi_def_cfa wsp, 48 ; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret %1 = alloca i32, align 4 %2 = alloca i32, align 4