diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3493,7 +3493,9 @@ BothFlags<[NoXarchOption, CC1Option], " the AAPCS standard requirement stating that" " volatile bit-field width is dictated by the field container type. (ARM only).">>, Group; - +def mframe_chain : Joined<["-"], "mframe-chain=">, + Group, Values<"none,aapcs,aapcs+leaf">, + HelpText<"Select the frame chain model used to emit frame records (Arm only).">; def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group, HelpText<"Generate code which only uses the general purpose registers (AArch64/x86 only)">; def mfix_cmse_cve_2021_35465 : Flag<["-"], "mfix-cmse-cve-2021-35465">, diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -718,6 +718,15 @@ } } + // Propagate frame-chain model selection + if (Arg *A = Args.getLastArg(options::OPT_mframe_chain)) { + StringRef FrameChainOption = A->getValue(); + if (FrameChainOption.startswith("aapcs")) + Features.push_back("+aapcs-frame-chain"); + if (FrameChainOption == "aapcs+leaf") + Features.push_back("+aapcs-frame-chain-leaf"); + } + // CMSE: Check for target 8M (for -mcmse to be applicable) is performed later. if (Args.getLastArg(options::OPT_mcmse)) Features.push_back("+8msecext"); diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -546,6 +546,16 @@ "FixCortexA57AES1742098", "true", "Work around Cortex-A57 Erratum 1742098 / Cortex-A72 Erratum 1655431 (AES)">; +def FeatureAAPCSFrameChain : SubtargetFeature<"aapcs-frame-chain", + "CreateAAPCSFrameChain", "true", + "Create an AAPCS compliant frame chain">; + +def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf", + "CreateAAPCSFrameChainLeaf", "true", + "Create an AAPCS compliant frame chain " + "for leaf functions", + [FeatureAAPCSFrameChain]>; + //===----------------------------------------------------------------------===// // ARM architecture class // diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -63,12 +63,8 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const ARMSubtarget &STI = MF->getSubtarget(); bool UseSplitPush = STI.splitFramePushPop(*MF); - const MCPhysReg *RegList = - STI.isTargetDarwin() - ? CSR_iOS_SaveList - : (UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList); - const Function &F = MF->getFunction(); + if (F.getCallingConv() == CallingConv::GHC) { // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around @@ -80,13 +76,13 @@ } else if (F.getCallingConv() == CallingConv::SwiftTail) { return STI.isTargetDarwin() ? CSR_iOS_SwiftTail_SaveList - : (UseSplitPush ? CSR_AAPCS_SplitPush_SwiftTail_SaveList + : (UseSplitPush ? CSR_ATPCS_SplitPush_SwiftTail_SaveList : CSR_AAPCS_SwiftTail_SaveList); } else if (F.hasFnAttribute("interrupt")) { if (STI.isMClass()) { // M-class CPUs have hardware which saves the registers needed to allow a // function conforming to the AAPCS to function as a handler. - return UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList; + return UseSplitPush ? CSR_ATPCS_SplitPush_SaveList : CSR_AAPCS_SaveList; } else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") { // Fast interrupt mode gives the handler a private copy of R8-R14, so less // need to be saved to restore user-mode state. @@ -103,7 +99,7 @@ if (STI.isTargetDarwin()) return CSR_iOS_SwiftError_SaveList; - return UseSplitPush ? CSR_AAPCS_SplitPush_SwiftError_SaveList : + return UseSplitPush ? CSR_ATPCS_SplitPush_SwiftError_SaveList : CSR_AAPCS_SwiftError_SaveList; } @@ -111,7 +107,15 @@ return MF->getInfo()->isSplitCSR() ? CSR_iOS_CXX_TLS_PE_SaveList : CSR_iOS_CXX_TLS_SaveList; - return RegList; + + if (STI.isTargetDarwin()) + return CSR_iOS_SaveList; + + if (UseSplitPush) + return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList + : CSR_ATPCS_SplitPush_SaveList; + + return CSR_AAPCS_SaveList; } const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy( @@ -240,7 +244,7 @@ BitVector Reserved(getNumRegs()); markSuperRegs(Reserved, ARM::PC); - if (TFI->hasFP(MF)) + if (TFI->isFPReserved(MF)) markSuperRegs(Reserved, STI.getFramePointerReg()); if (hasBasePointer(MF)) markSuperRegs(Reserved, BasePtr); diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td --- a/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/llvm/lib/Target/ARM/ARMCallingConv.td @@ -284,8 +284,8 @@ // The order of callee-saved registers needs to match the order we actually push // them in FrameLowering, because this order is what's used by // PrologEpilogInserter to allocate frame index slots. So when R7 is the frame -// pointer, we use this AAPCS alternative. -def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, +// pointer, we use this ATPCS alternative. +def CSR_ATPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, R11, R10, R9, R8, (sequence "D%u", 15, 8))>; @@ -294,13 +294,22 @@ LR, R11)>; // R8 is used to pass swifterror, remove it from CSR. -def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, +def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, R8)>; // R10 is used to pass swifterror, remove it from CSR. -def CSR_AAPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, +def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, R10)>; +// When enforcing an AAPCS compliant frame chain, R11 is used as the frame +// pointer even for Thumb targets, where split pushes are necessary. +// This AAPCS alternative makes sure the frame index slots match the push +// order in that case. +def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11, + R7, R6, R5, R4, + R10, R9, R8, + (sequence "D%u", 15, 8))>; + // Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' // and the pointer return value are both passed in R0 in these cases, this can // be partially modelled by treating R0 as a callee-saved register diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h --- a/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -46,6 +46,7 @@ bool enableCalleeSaveSkip(const MachineFunction &MF) const override; bool hasFP(const MachineFunction &MF) const override; + bool isFPReserved(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const override; bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -47,7 +47,8 @@ // | | // |-----------------------------------| // | | -// | prev_fp, prev_lr | +// | prev_lr | +// | prev_fp | // | (a.k.a. "frame record") | // | | // |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11) @@ -211,6 +212,12 @@ MFI.isFrameAddressTaken()); } +/// isFPReserved - Return true if the frame pointer register should be +/// considered a reserved register on the scope of the specified function. +bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const { + return hasFP(MF) || MF.getSubtarget().createAAPCSFrameChain(); +} + /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is /// not required, we reserve argument space for call sites in the function /// immediately on entry to the current function. This eliminates the need for @@ -1033,6 +1040,9 @@ // into spill area 1, including the FP in R11. In either case, it // is in area one and the adjustment needs to take place just after // that push. + // FIXME: The above is not necessary true when PACBTI is enabled. + // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes, + // so FP ends up on area two. MachineBasicBlock::iterator AfterPush; if (HasFP) { AfterPush = std::next(GPRCS1Push); @@ -2196,6 +2206,34 @@ return true; } +static bool requiresAAPCSFrameRecord(const MachineFunction &MF) { + const auto &Subtarget = MF.getSubtarget(); + return Subtarget.createAAPCSFrameChainLeaf() || + (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls()); +} + +// Thumb1 may require a spill when storing to a frame index through FP, for +// cases where FP is a high register (R11). This scans the function for cases +// where this may happen. +static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, + const TargetFrameLowering &TFI) { + const ARMFunctionInfo *AFI = MF.getInfo(); + if (!AFI->isThumb1OnlyFunction()) + return false; + + for (const auto &MBB : MF) + for (const auto &MI : MBB) + if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi) + for (const auto &Op : MI.operands()) + if (Op.isFI()) { + Register Reg; + TFI.getFrameIndexReference(MF, Op.getIndex(), Reg); + if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP) + return true; + } + return false; +} + void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { @@ -2204,7 +2242,7 @@ // to take advantage the eliminateFrameIndex machinery. This also ensures it // is spilled in the order specified by getCalleeSavedRegs() to make it easier // to combine multiple loads / stores. - bool CanEliminateFrame = true; + bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)); bool CS1Spilled = false; bool LRSpilled = false; unsigned NumGPRSpills = 0; @@ -2399,6 +2437,11 @@ // Functions with VLAs or extremely large call frames are rare, and // if a function is allocating more than 1KB of stack, an extra 4-byte // slot probably isn't relevant. + // + // A special case is the scenario where r11 is used as FP, where accesses + // to a frame index will require its value to be moved into a low reg. + // This is handled later on, once we are able to determine if we have any + // fp-relative accesses. if (RegInfo->hasBasePointer(MF)) EstimatedRSStackSizeLimit = (1U << 5) * 4; else @@ -2445,7 +2488,9 @@ SavedRegs.set(FramePtr); // If the frame pointer is required by the ABI, also spill LR so that we // emit a complete frame record. - if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) { + if ((requiresAAPCSFrameRecord(MF) || + MF.getTarget().Options.DisableFramePointerElim(MF)) && + !LRSpilled) { SavedRegs.set(ARM::LR); LRSpilled = true; NumGPRSpills++; @@ -2527,7 +2572,7 @@ } // r7 can be used if it is not being used as the frame pointer. - if (!HasFP) { + if (!HasFP || FramePtr != ARM::R7) { if (SavedRegs.test(ARM::R7)) { --RegDeficit; LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = " @@ -2648,8 +2693,10 @@ // to materialize a stack offset. If so, either spill one additional // callee-saved register or reserve a special spill slot to facilitate // register scavenging. Thumb1 needs a spill slot for stack pointer - // adjustments also, even when the frame itself is small. - if (BigFrameOffsets && !ExtraCSSpill) { + // adjustments and for frame index accesses when FP is high register, + // even when the frame itself is small. + if (!ExtraCSSpill && + (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this))) { // If any non-reserved CS register isn't spilled, just spill one or two // extra. That should take care of it! unsigned NumExtras = TargetAlign.value() / 4; diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -86,6 +86,7 @@ /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills /// areas. unsigned FPCXTSaveSize = 0; + unsigned FRSaveSize = 0; unsigned GPRCS1Size = 0; unsigned GPRCS2Size = 0; unsigned DPRCSAlignGapSize = 0; @@ -203,12 +204,14 @@ void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; } unsigned getFPCXTSaveAreaSize() const { return FPCXTSaveSize; } + unsigned getFrameRecordSavedAreaSize() const { return FRSaveSize; } unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; } unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; } unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; } unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; } void setFPCXTSaveAreaSize(unsigned s) { FPCXTSaveSize = s; } + void setFrameRecordSavedAreaSize(unsigned s) { FRSaveSize = s; } void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; } void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -430,7 +430,8 @@ } MCPhysReg getFramePointerReg() const { - if (isTargetDarwin() || (!isTargetWindows() && isThumb())) + if (isTargetDarwin() || + (!isTargetWindows() && isThumb() && !createAAPCSFrameChain())) return ARM::R7; return ARM::R11; } diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -176,7 +176,7 @@ // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. - unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; + unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; if (ArgRegsSaveSize) { @@ -205,26 +205,38 @@ return; } + bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(FramePtr); + for (const CalleeSavedInfo &I : CSI) { Register Reg = I.getReg(); int FI = I.getFrameIdx(); + if (Reg == FramePtr) + FramePtrSpillFI = FI; switch (Reg) { + case ARM::R11: + if (HasFrameRecordArea) { + FRSize += 4; + break; + } + LLVM_FALLTHROUGH; case ARM::R8: case ARM::R9: case ARM::R10: - case ARM::R11: if (STI.splitFramePushPop(MF)) { GPRCS2Size += 4; break; } LLVM_FALLTHROUGH; + case ARM::LR: + if (HasFrameRecordArea) { + FRSize += 4; + break; + } + LLVM_FALLTHROUGH; case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; GPRCS1Size += 4; break; default: @@ -232,18 +244,53 @@ } } + MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push; + if (HasFrameRecordArea) { + // Skip Frame Record setup: + // push {lr} + // mov lr, r11 + // push {lr} + std::advance(MBBI, 2); + FRPush = MBBI++; + } + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { + GPRCS1Push = MBBI; ++MBBI; } + // Find last push instruction for GPRCS2 - spilling of high registers + // (r8-r11) could consist of multiple tPUSH and tMOVr instructions. + while (true) { + MachineBasicBlock::iterator OldMBBI = MBBI; + // Skip a run of tMOVr instructions + while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && + MBBI->getFlag(MachineInstr::FrameSetup)) + MBBI++; + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH && + MBBI->getFlag(MachineInstr::FrameSetup)) { + GPRCS2Push = MBBI; + MBBI++; + } else { + // We have reached an instruction which is not a push, so the previous + // run of tMOVr instructions (which may have been empty) was not part of + // the prologue. Reset MBBI back to the last PUSH of the prologue. + MBBI = OldMBBI; + break; + } + } + // Determine starting offsets of spill areas. - unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - + (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; bool HasFP = hasFP(MF); if (HasFP) AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); + if (HasFrameRecordArea) + AFI->setFrameRecordSavedAreaSize(FRSize); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); @@ -252,71 +299,45 @@ int FramePtrOffsetInBlock = 0; unsigned adjustedGPRCS1Size = GPRCS1Size; if (GPRCS1Size > 0 && GPRCS2Size == 0 && - tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) { + tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) { FramePtrOffsetInBlock = NumBytes; adjustedGPRCS1Size += NumBytes; NumBytes = 0; } - - if (adjustedGPRCS1Size) { - CFAOffset += adjustedGPRCS1Size; - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - } - for (const CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: - if (STI.splitFramePushPop(MF)) - break; - LLVM_FALLTHROUGH; - case ARM::R0: - case ARM::R1: - case ARM::R2: - case ARM::R3: - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - break; - } - } + CFAOffset += adjustedGPRCS1Size; // Adjust FP so it point to the stack slot that contains the previous FP. if (HasFP) { - FramePtrOffsetInBlock += - MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; - BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addReg(ARM::SP) - .addImm(FramePtrOffsetInBlock / 4) - .setMIFlags(MachineInstr::FrameSetup) - .add(predOps(ARMCC::AL)); + MachineBasicBlock::iterator AfterPush = + HasFrameRecordArea ? std::next(FRPush) : std::next(GPRCS1Push); + if (HasFrameRecordArea) { + // We have just finished pushing the previous FP into the stack, + // so simply capture the SP value as the new Frame Pointer. + BuildMI(MBB, AfterPush, dl, TII.get(ARM::tMOVr), FramePtr) + .addReg(ARM::SP) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); + } else { + FramePtrOffsetInBlock += + MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; + BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr) + .addReg(ARM::SP) + .addImm(FramePtrOffsetInBlock / 4) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); + } + if(FramePtrOffsetInBlock) { - CFAOffset -= FramePtrOffsetInBlock; unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( - nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + nullptr, MRI->getDwarfRegNum(FramePtr, true), (CFAOffset - FramePtrOffsetInBlock))); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } @@ -326,45 +347,69 @@ AFI->setShouldRestoreSPFromFP(true); } - // Skip past the spilling of r8-r11, which could consist of multiple tPUSH - // and tMOVr instructions. We don't need to add any call frame information - // in-between these instructions, because they do not modify the high - // registers. - while (true) { - MachineBasicBlock::iterator OldMBBI = MBBI; - // Skip a run of tMOVr instructions - while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr) - MBBI++; - if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { - MBBI++; - } else { - // We have reached an instruction which is not a push, so the previous - // run of tMOVr instructions (which may have been empty) was not part of - // the prologue. Reset MBBI back to the last PUSH of the prologue. - MBBI = OldMBBI; - break; + // Emit call frame information for the callee-saved low registers. + if (GPRCS1Size > 0) { + MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); + if (adjustedGPRCS1Size) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.splitFramePushPop(MF)) + break; + LLVM_FALLTHROUGH; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + break; + } } } // Emit call frame information for the callee-saved high registers. - for (auto &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: { - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - break; - } - default: - break; + if (GPRCS2Size > 0) { + MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); + for (auto &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: { + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + break; + } + default: + break; + } } } @@ -486,7 +531,8 @@ } // Move SP to start of FP callee save spill area. - NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + + NumBytes -= (AFI->getFrameRecordSavedAreaSize() + + AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize() + ArgRegsSaveSize); @@ -789,65 +835,53 @@ return true; } -using ARMRegSet = std::bitset; - -// Return the first iteraror after CurrentReg which is present in EnabledRegs, -// or OrderEnd if no further registers are in that set. This does not advance -// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs. -static const unsigned *findNextOrderedReg(const unsigned *CurrentReg, - const ARMRegSet &EnabledRegs, - const unsigned *OrderEnd) { - while (CurrentReg != OrderEnd && !EnabledRegs[*CurrentReg]) - ++CurrentReg; - return CurrentReg; -} - -bool Thumb1FrameLowering::spillCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - ArrayRef CSI, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL; - const TargetInstrInfo &TII = *STI.getInstrInfo(); - MachineFunction &MF = *MBB.getParent(); - const ARMBaseRegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); - - ARMRegSet LoRegsToSave; // r0-r7, lr - ARMRegSet HiRegsToSave; // r8-r11 - ARMRegSet CopyRegs; // Registers which can be used after pushing - // LoRegs for saving HiRegs. - - for (const CalleeSavedInfo &I : llvm::reverse(CSI)) { - Register Reg = I.getReg(); - +static const SmallVector OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6, + ARM::R7, ARM::LR}; +static const SmallVector OrderedHighRegs = {ARM::R8, ARM::R9, + ARM::R10, ARM::R11}; +static const SmallVector OrderedCopyRegs = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, + ARM::R5, ARM::R6, ARM::R7, ARM::LR}; + +static void splitLowAndHighRegs(const std::set &Regs, + std::set &LowRegs, + std::set &HighRegs) { + for (Register Reg : Regs) { if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LoRegsToSave[Reg] = true; + LowRegs.insert(Reg); } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HiRegsToSave[Reg] = true; + HighRegs.insert(Reg); } else { llvm_unreachable("callee-saved register of unexpected class"); } - - if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && - !MF.getRegInfo().isLiveIn(Reg) && - !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) - CopyRegs[Reg] = true; } +} - // Unused argument registers can be used for the high register saving. - for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) - if (!MF.getRegInfo().isLiveIn(ArgReg)) - CopyRegs[ArgReg] = true; +template +It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt, + const std::set &RegSet) { + return std::find_if(OrderedStartIt, OrderedEndIt, + [&](Register Reg) { return RegSet.count(Reg); }); +} - // Push the low registers and lr +static void pushRegsToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const TargetInstrInfo &TII, + const std::set &RegsToSave, + const std::set &CopyRegs) { + MachineFunction &MF = *MBB.getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (!LoRegsToSave.none()) { + DebugLoc DL; + + std::set LowRegs, HighRegs; + splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs); + + // Push low regs first + if (!LowRegs.empty()) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); - for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { - if (LoRegsToSave[Reg]) { + for (unsigned Reg : OrderedLowRegs) { + if (LowRegs.count(Reg)) { bool isKill = !MRI.isLiveIn(Reg); if (isKill && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); @@ -858,31 +892,26 @@ MIB.setMIFlags(MachineInstr::FrameSetup); } - // Push the high registers. There are no store instructions that can access - // these registers directly, so we have to move them to low registers, and - // push them. This might take multiple pushes, as it is possible for there to + // Now push the high registers + // There are no store instructions that can access high registers directly, + // so we have to move them to low registers, and push them. + // This might take multiple pushes, as it is possible for there to // be fewer low registers available than high registers which need saving. - // These are in reverse order so that in the case where we need to use + // Find the first register to save. + // Registers must be processed in reverse order so that in case we need to use // multiple PUSH instructions, the order of the registers on the stack still // matches the unwind info. They need to be swicthed back to ascending order // before adding to the PUSH instruction. - static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6, - ARM::R5, ARM::R4, ARM::R3, - ARM::R2, ARM::R1, ARM::R0}; - static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8}; - - const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); - const unsigned *AllHighRegsEnd = std::end(AllHighRegs); - - // Find the first register to save. - const unsigned *HiRegToSave = findNextOrderedReg( - std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd); + auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(), + OrderedHighRegs.rend(), + HighRegs); - while (HiRegToSave != AllHighRegsEnd) { + while (HiRegToSave != OrderedHighRegs.rend()) { // Find the first low register to use. - const unsigned *CopyReg = - findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); + auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(), + OrderedCopyRegs.rend(), + CopyRegs); // Create the PUSH, but don't insert it yet (the MOVs need to come first). MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)) @@ -890,25 +919,29 @@ .setMIFlags(MachineInstr::FrameSetup); SmallVector RegsToPush; - while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { - if (HiRegsToSave[*HiRegToSave]) { + while (HiRegToSave != OrderedHighRegs.rend() && + CopyRegIt != OrderedCopyRegs.rend()) { + if (HighRegs.count(*HiRegToSave)) { bool isKill = !MRI.isLiveIn(*HiRegToSave); if (isKill && !MRI.isReserved(*HiRegToSave)) MBB.addLiveIn(*HiRegToSave); // Emit a MOV from the high reg to the low reg. BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) - .addReg(*CopyReg, RegState::Define) + .addReg(*CopyRegIt, RegState::Define) .addReg(*HiRegToSave, getKillRegState(isKill)) .add(predOps(ARMCC::AL)) .setMIFlags(MachineInstr::FrameSetup); // Record the register that must be added to the PUSH. - RegsToPush.push_back(*CopyReg); - - CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); - HiRegToSave = - findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd); + RegsToPush.push_back(*CopyRegIt); + + CopyRegIt = getNextOrderedReg(std::next(CopyRegIt), + OrderedCopyRegs.rend(), + CopyRegs); + HiRegToSave = getNextOrderedReg(std::next(HiRegToSave), + OrderedHighRegs.rend(), + HighRegs); } } @@ -919,85 +952,63 @@ // Insert the PUSH instruction after the MOVs. MBB.insert(MI, PushMIB); } - - return true; } -bool Thumb1FrameLowering::restoreCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; +static void popRegsFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MI, + const TargetInstrInfo &TII, + const std::set &RegsToRestore, + const std::set &AvailableCopyRegs, + bool IsVarArg, bool HasV5Ops) { + if (RegsToRestore.empty()) + return; MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo(); - const TargetInstrInfo &TII = *STI.getInstrInfo(); - const ARMBaseRegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); - - bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); - ARMRegSet LoRegsToRestore; - ARMRegSet HiRegsToRestore; - // Low registers (r0-r7) which can be used to restore the high registers. - ARMRegSet CopyRegs; - - for (CalleeSavedInfo I : CSI) { - Register Reg = I.getReg(); - - if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LoRegsToRestore[Reg] = true; - } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HiRegsToRestore[Reg] = true; - } else { - llvm_unreachable("callee-saved register of unexpected class"); - } - - // If this is a low register not used as the frame pointer, we may want to - // use it for restoring the high registers. - if ((ARM::tGPRRegClass.contains(Reg)) && - !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) - CopyRegs[Reg] = true; - } - - // If this is a return block, we may be able to use some unused return value - // registers for restoring the high regs. - auto Terminator = MBB.getFirstTerminator(); - if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { - CopyRegs[ARM::R0] = true; - CopyRegs[ARM::R1] = true; - CopyRegs[ARM::R2] = true; - CopyRegs[ARM::R3] = true; - for (auto Op : Terminator->implicit_operands()) { - if (Op.isReg()) - CopyRegs[Op.getReg()] = false; - } - } - - static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7}; - static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11}; + std::set LowRegs, HighRegs; + splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs); - const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); - const unsigned *AllHighRegsEnd = std::end(AllHighRegs); + // Pop the high registers first + // There are no store instructions that can access high registers directly, + // so we have to pop into low registers and them move to the high registers. + // This might take multiple pops, as it is possible for there to + // be fewer low registers available than high registers which need restoring. // Find the first register to restore. - auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs), - HiRegsToRestore, AllHighRegsEnd); + auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(), + OrderedHighRegs.end(), + HighRegs); + + std::set CopyRegs = AvailableCopyRegs; + Register LowScratchReg; + if (!HighRegs.empty() && CopyRegs.empty()) { + // No copy regs are available to pop high regs. Let's make use of a return + // register and the scratch register (IP/R12) to copy things around. + LowScratchReg = ARM::R0; + BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) + .addReg(ARM::R12, RegState::Define) + .addReg(LowScratchReg, RegState::Kill) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); + CopyRegs.insert(LowScratchReg); + } - while (HiRegToRestore != AllHighRegsEnd) { - assert(!CopyRegs.none()); + while (HiRegToRestore != OrderedHighRegs.end()) { + assert(!CopyRegs.empty()); // Find the first low register to use. - auto CopyReg = - findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); + auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(), + OrderedCopyRegs.end(), + CopyRegs); // Create the POP instruction. MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)) .add(predOps(ARMCC::AL)) .setMIFlag(MachineInstr::FrameDestroy); - while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { + while (HiRegToRestore != OrderedHighRegs.end() && + CopyReg != OrderedCopyRegs.end()) { // Add the low register to the POP. PopMIB.addReg(*CopyReg, RegState::Define); @@ -1008,63 +1019,186 @@ .add(predOps(ARMCC::AL)) .setMIFlag(MachineInstr::FrameDestroy); - CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); - HiRegToRestore = - findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd); + CopyReg = getNextOrderedReg(std::next(CopyReg), + OrderedCopyRegs.end(), + CopyRegs); + HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore), + OrderedHighRegs.end(), + HighRegs); } } - MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)) - .add(predOps(ARMCC::AL)) - .setMIFlag(MachineInstr::FrameDestroy); - - bool NeedsPop = false; - for (CalleeSavedInfo &Info : llvm::reverse(CSI)) { - Register Reg = Info.getReg(); - - // High registers (excluding lr) have already been dealt with - if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR)) - continue; - - if (Reg == ARM::LR) { - Info.setRestored(false); - if (!MBB.succ_empty() || - MI->getOpcode() == ARM::TCRETURNdi || - MI->getOpcode() == ARM::TCRETURNri) - // LR may only be popped into PC, as part of return sequence. - // If this isn't the return sequence, we'll need emitPopSpecialFixUp - // to restore LR the hard way. - // FIXME: if we don't pass any stack arguments it would be actually - // advantageous *and* correct to do the conversion to an ordinary call - // instruction here. - continue; - // Special epilogue for vararg functions. See emitEpilogue - if (isVarArg) - continue; - // ARMv4T requires BX, see emitEpilogue - if (!STI.hasV5TOps()) - continue; + // Restore low register used as scratch if necessary + if (LowScratchReg.isValid()) { + BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) + .addReg(LowScratchReg, RegState::Define) + .addReg(ARM::R12, RegState::Kill) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); + } - // CMSE entry functions must return via BXNS, see emitEpilogue. - if (AFI->isCmseNSEntryFunction()) + // Now pop the low registers + if (!LowRegs.empty()) { + MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); + + bool NeedsPop = false; + for (Register Reg : OrderedLowRegs) { + if (!LowRegs.count(Reg)) continue; - // Pop LR into PC. - Reg = ARM::PC; - (*MIB).setDesc(TII.get(ARM::tPOP_RET)); - if (MI != MBB.end()) - MIB.copyImplicitOps(*MI); - MI = MBB.erase(MI); + if (Reg == ARM::LR) { + if (!MBB.succ_empty() || + MI->getOpcode() == ARM::TCRETURNdi || + MI->getOpcode() == ARM::TCRETURNri) + // LR may only be popped into PC, as part of return sequence. + // If this isn't the return sequence, we'll need emitPopSpecialFixUp + // to restore LR the hard way. + // FIXME: if we don't pass any stack arguments it would be actually + // advantageous *and* correct to do the conversion to an ordinary call + // instruction here. + continue; + // Special epilogue for vararg functions. See emitEpilogue + if (IsVarArg) + continue; + // ARMv4T requires BX, see emitEpilogue + if (!HasV5Ops) + continue; + + // CMSE entry functions must return via BXNS, see emitEpilogue. + if (AFI->isCmseNSEntryFunction()) + continue; + + // Pop LR into PC. + Reg = ARM::PC; + (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + if (MI != MBB.end()) + MIB.copyImplicitOps(*MI); + MI = MBB.erase(MI); + } + MIB.addReg(Reg, getDefRegState(true)); + NeedsPop = true; } - MIB.addReg(Reg, getDefRegState(true)); - NeedsPop = true; + + // It's illegal to emit pop instruction without operands. + if (NeedsPop) + MBB.insert(MI, &*MIB); + else + MF.deleteMachineInstr(MIB); } +} + +bool Thumb1FrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + ArrayRef CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + const TargetInstrInfo &TII = *STI.getInstrInfo(); + MachineFunction &MF = *MBB.getParent(); + const ARMBaseRegisterInfo *RegInfo = static_cast( + MF.getSubtarget().getRegisterInfo()); + Register FPReg = RegInfo->getFrameRegister(MF); + + // In case FP is a high reg, we need a separate push sequence to generate + // a correct Frame Record + bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); + + std::set FrameRecord; + std::set SpilledGPRs; + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR)) + FrameRecord.insert(Reg); + else + SpilledGPRs.insert(Reg); + } + + pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR}); + + // Determine intermediate registers which can be used for pushing high regs: + // - Spilled low regs + // - Unused argument registers + std::set CopyRegs; + for (Register Reg : SpilledGPRs) + if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && + !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg)) + CopyRegs.insert(Reg); + for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) + if (!MF.getRegInfo().isLiveIn(ArgReg)) + CopyRegs.insert(ArgReg); + + pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs); + + return true; +} + +bool Thumb1FrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + const ARMBaseRegisterInfo *RegInfo = static_cast( + MF.getSubtarget().getRegisterInfo()); + bool IsVarArg = AFI->getArgRegsSaveSize() > 0; + Register FPReg = RegInfo->getFrameRegister(MF); + + // In case FP is a high reg, we need a separate pop sequence to generate + // a correct Frame Record + bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); + + std::set FrameRecord; + std::set SpilledGPRs; + for (CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR)) + FrameRecord.insert(Reg); + else + SpilledGPRs.insert(Reg); + + if (Reg == ARM::LR) + I.setRestored(false); + } + + // Determine intermidiate registers which can be used for popping high regs: + // - Spilled low regs + // - Unused return registers + std::set CopyRegs; + std::set UnusedReturnRegs; + for (Register Reg : SpilledGPRs) + if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg)) + CopyRegs.insert(Reg); + auto Terminator = MBB.getFirstTerminator(); + if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { + UnusedReturnRegs.insert(ARM::R0); + UnusedReturnRegs.insert(ARM::R1); + UnusedReturnRegs.insert(ARM::R2); + UnusedReturnRegs.insert(ARM::R3); + for (auto Op : Terminator->implicit_operands()) { + if (Op.isReg()) + UnusedReturnRegs.erase(Op.getReg()); + } + } + CopyRegs.insert(UnusedReturnRegs.begin(), UnusedReturnRegs.end()); + + // First pop regular spilled regs. + popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg, + STI.hasV5TOps()); + + // LR may only be popped into pc, as part of a return sequence. + // Check that no other pop instructions are inserted after that. + assert((!SpilledGPRs.count(ARM::LR) || FrameRecord.empty()) && + "Can't insert pop after return sequence"); - // It's illegal to emit pop instruction without operands. - if (NeedsPop) - MBB.insert(MI, &*MIB); - else - MF.deleteMachineInstr(MIB); + // Now pop Frame Record regs. + // Only unused return registers can be used as copy regs at this point. + popRegsFromStack(MBB, MI, TII, FrameRecord, UnusedReturnRegs, IsVarArg, + STI.hasV5TOps()); return true; } diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp --- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -361,6 +361,7 @@ const ARMBaseInstrInfo &TII) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); assert(MBB.getParent()->getSubtarget().isThumb1Only() && "This isn't needed for thumb2!"); DebugLoc dl = MI.getDebugLoc(); @@ -396,7 +397,18 @@ if ((unsigned)Offset <= Mask * Scale) { // Replace the FrameIndex with the frame register (e.g., sp). - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + Register DestReg = FrameReg; + + // In case FrameReg is a high register, move it to a low reg to ensure it + // can be used as an operand. + if (ARM::hGPRRegClass.contains(FrameReg) && FrameReg != ARM::SP) { + DestReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); + BuildMI(MBB, II, dl, TII.get(ARM::tMOVr), DestReg) + .addReg(FrameReg) + .add(predOps(ARMCC::AL)); + } + + MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false); ImmOp.ChangeToImmediate(ImmedOffset); // If we're using a register where sp was stored, convert the instruction @@ -517,7 +529,16 @@ Offset, false, TII, *this); else { emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); - UseRR = true; + if (!ARM::hGPRRegClass.contains(FrameReg)) { + UseRR = true; + } else { + // If FrameReg is a high register, add the reg values in a separate + // instruction as the load won't be able to access it. + BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), TmpReg) + .addReg(TmpReg) + .addReg(FrameReg) + .add(predOps(ARMCC::AL)); + } } } else { emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII, @@ -526,11 +547,14 @@ MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi)); MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true); - if (UseRR) + if (UseRR) { + assert(!ARM::hGPRRegClass.contains(FrameReg) && + "Thumb1 loads can't use high register"); // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, false); + } } else if (MI.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); bool UseRR = false; @@ -541,18 +565,30 @@ Offset, false, TII, *this); else { emitLoadConstPool(MBB, II, dl, VReg, 0, Offset); - UseRR = true; + if (!ARM::hGPRRegClass.contains(FrameReg)) { + UseRR = true; + } else { + // If FrameReg is a high register, add the reg values in a separate + // instruction as the load won't be able to access it. + BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), VReg) + .addReg(VReg) + .addReg(FrameReg) + .add(predOps(ARMCC::AL)); + } } } else emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII, *this); MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi)); MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true); - if (UseRR) + if (UseRR) { + assert(!ARM::hGPRRegClass.contains(FrameReg) && + "Thumb1 stores can't use high register"); // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, false); + } } else { llvm_unreachable("Unexpected opcode!"); } diff --git a/llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll b/llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll @@ -0,0 +1,25 @@ +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 + +declare void @leaf(i32 %input) + +define void @reserved_r7(i32 %input) { +; RESERVED-NONE-NOT: error: write to reserved register 'R7' +; RESERVED-R11-NOT: error: write to reserved register 'R7' + %1 = call i32 asm sideeffect "mov $0, $1", "={r7},r"(i32 %input) + ret void +} + +define void @reserved_r11(i32 %input) { +; RESERVED-NONE-NOT: error: write to reserved register 'R11' +; RESERVED-R11: error: write to reserved register 'R11' + %1 = call i32 asm sideeffect "mov $0, $1", "={r11},r"(i32 %input) + ret void +} diff --git a/llvm/test/CodeGen/ARM/frame-chain.ll b/llvm/test/CodeGen/ARM/frame-chain.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/frame-chain.ll @@ -0,0 +1,223 @@ +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all | FileCheck %s --check-prefixes=FP,LEAF-FP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP-AAPCS +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf | FileCheck %s --check-prefixes=FP,LEAF-NOFP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP-AAPCS +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none | FileCheck %s --check-prefixes=NOFP,LEAF-NOFP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP-AAPCS + +define dso_local noundef i32 @leaf(i32 noundef %0) { +; LEAF-FP-LABEL: leaf: +; LEAF-FP: @ %bb.0: +; LEAF-FP-NEXT: .pad #4 +; LEAF-FP-NEXT: sub sp, sp, #4 +; LEAF-FP-NEXT: str r0, [sp] +; LEAF-FP-NEXT: add r0, r0, #4 +; LEAF-FP-NEXT: add sp, sp, #4 +; LEAF-FP-NEXT: mov pc, lr +; +; LEAF-FP-AAPCS-LABEL: leaf: +; LEAF-FP-AAPCS: @ %bb.0: +; LEAF-FP-AAPCS-NEXT: .save {r11, lr} +; LEAF-FP-AAPCS-NEXT: push {r11, lr} +; LEAF-FP-AAPCS-NEXT: .setfp r11, sp +; LEAF-FP-AAPCS-NEXT: mov r11, sp +; LEAF-FP-AAPCS-NEXT: push {r0} +; LEAF-FP-AAPCS-NEXT: add r0, r0, #4 +; LEAF-FP-AAPCS-NEXT: mov sp, r11 +; LEAF-FP-AAPCS-NEXT: pop {r11, lr} +; LEAF-FP-AAPCS-NEXT: mov pc, lr +; +; LEAF-NOFP-LABEL: leaf: +; LEAF-NOFP: @ %bb.0: +; LEAF-NOFP-NEXT: .pad #4 +; LEAF-NOFP-NEXT: sub sp, sp, #4 +; LEAF-NOFP-NEXT: str r0, [sp] +; LEAF-NOFP-NEXT: add r0, r0, #4 +; LEAF-NOFP-NEXT: add sp, sp, #4 +; LEAF-NOFP-NEXT: mov pc, lr +; +; LEAF-NOFP-AAPCS-LABEL: leaf: +; LEAF-NOFP-AAPCS: @ %bb.0: +; LEAF-NOFP-AAPCS-NEXT: .pad #4 +; LEAF-NOFP-AAPCS-NEXT: sub sp, sp, #4 +; LEAF-NOFP-AAPCS-NEXT: str r0, [sp] +; LEAF-NOFP-AAPCS-NEXT: add r0, r0, #4 +; LEAF-NOFP-AAPCS-NEXT: add sp, sp, #4 +; LEAF-NOFP-AAPCS-NEXT: mov pc, lr + %2 = alloca i32, align 4 + store i32 %0, i32* %2, align 4 + %3 = load i32, i32* %2, align 4 + %4 = add nsw i32 %3, 4 + ret i32 %4 +} + +define dso_local noundef i32 @non_leaf(i32 noundef %0) { +; FP-LABEL: non_leaf: +; FP: @ %bb.0: +; FP-NEXT: .save {r11, lr} +; FP-NEXT: push {r11, lr} +; FP-NEXT: .setfp r11, sp +; FP-NEXT: mov r11, sp +; FP-NEXT: .pad #8 +; FP-NEXT: sub sp, sp, #8 +; FP-NEXT: str r0, [sp, #4] +; FP-NEXT: bl leaf +; FP-NEXT: add r0, r0, #1 +; FP-NEXT: mov sp, r11 +; FP-NEXT: pop {r11, lr} +; FP-NEXT: mov pc, lr +; +; FP-AAPCS-LABEL: non_leaf: +; FP-AAPCS: @ %bb.0: +; FP-AAPCS-NEXT: .save {r11, lr} +; FP-AAPCS-NEXT: push {r11, lr} +; FP-AAPCS-NEXT: .setfp r11, sp +; FP-AAPCS-NEXT: mov r11, sp +; FP-AAPCS-NEXT: .pad #8 +; FP-AAPCS-NEXT: sub sp, sp, #8 +; FP-AAPCS-NEXT: str r0, [sp, #4] +; FP-AAPCS-NEXT: bl leaf +; FP-AAPCS-NEXT: add r0, r0, #1 +; FP-AAPCS-NEXT: mov sp, r11 +; FP-AAPCS-NEXT: pop {r11, lr} +; FP-AAPCS-NEXT: mov pc, lr +; +; NOFP-LABEL: non_leaf: +; NOFP: @ %bb.0: +; NOFP-NEXT: .save {r11, lr} +; NOFP-NEXT: push {r11, lr} +; NOFP-NEXT: .pad #8 +; NOFP-NEXT: sub sp, sp, #8 +; NOFP-NEXT: str r0, [sp, #4] +; NOFP-NEXT: bl leaf +; NOFP-NEXT: add r0, r0, #1 +; NOFP-NEXT: add sp, sp, #8 +; NOFP-NEXT: pop {r11, lr} +; NOFP-NEXT: mov pc, lr +; +; NOFP-AAPCS-LABEL: non_leaf: +; NOFP-AAPCS: @ %bb.0: +; NOFP-AAPCS-NEXT: .save {r11, lr} +; NOFP-AAPCS-NEXT: push {r11, lr} +; NOFP-AAPCS-NEXT: .pad #8 +; NOFP-AAPCS-NEXT: sub sp, sp, #8 +; NOFP-AAPCS-NEXT: str r0, [sp, #4] +; NOFP-AAPCS-NEXT: bl leaf +; NOFP-AAPCS-NEXT: add r0, r0, #1 +; NOFP-AAPCS-NEXT: add sp, sp, #8 +; NOFP-AAPCS-NEXT: pop {r11, lr} +; NOFP-AAPCS-NEXT: mov pc, lr + %2 = alloca i32, align 4 + store i32 %0, i32* %2, align 4 + %3 = load i32, i32* %2, align 4 + %4 = call noundef i32 @leaf(i32 noundef %3) + %5 = add nsw i32 %4, 1 + ret i32 %5 +} + +declare i8* @llvm.stacksave() +define dso_local void @required_fp(i32 %0, i32 %1) { +; LEAF-FP-LABEL: required_fp: +; LEAF-FP: @ %bb.0: +; LEAF-FP-NEXT: .save {r4, r5, r11, lr} +; LEAF-FP-NEXT: push {r4, r5, r11, lr} +; LEAF-FP-NEXT: .setfp r11, sp, #8 +; LEAF-FP-NEXT: add r11, sp, #8 +; LEAF-FP-NEXT: .pad #24 +; LEAF-FP-NEXT: sub sp, sp, #24 +; LEAF-FP-NEXT: str r1, [r11, #-16] +; LEAF-FP-NEXT: mov r1, #7 +; LEAF-FP-NEXT: add r1, r1, r0, lsl #2 +; LEAF-FP-NEXT: str r0, [r11, #-12] +; LEAF-FP-NEXT: bic r1, r1, #7 +; LEAF-FP-NEXT: str sp, [r11, #-24] +; LEAF-FP-NEXT: sub sp, sp, r1 +; LEAF-FP-NEXT: mov r1, #0 +; LEAF-FP-NEXT: str r0, [r11, #-32] +; LEAF-FP-NEXT: str r1, [r11, #-28] +; LEAF-FP-NEXT: sub sp, r11, #8 +; LEAF-FP-NEXT: pop {r4, r5, r11, lr} +; LEAF-FP-NEXT: mov pc, lr +; +; LEAF-FP-AAPCS-LABEL: required_fp: +; LEAF-FP-AAPCS: @ %bb.0: +; LEAF-FP-AAPCS-NEXT: .save {r4, r5, r11, lr} +; LEAF-FP-AAPCS-NEXT: push {r4, r5, r11, lr} +; LEAF-FP-AAPCS-NEXT: .setfp r11, sp, #8 +; LEAF-FP-AAPCS-NEXT: add r11, sp, #8 +; LEAF-FP-AAPCS-NEXT: .pad #24 +; LEAF-FP-AAPCS-NEXT: sub sp, sp, #24 +; LEAF-FP-AAPCS-NEXT: str r1, [r11, #-16] +; LEAF-FP-AAPCS-NEXT: mov r1, #7 +; LEAF-FP-AAPCS-NEXT: add r1, r1, r0, lsl #2 +; LEAF-FP-AAPCS-NEXT: str r0, [r11, #-12] +; LEAF-FP-AAPCS-NEXT: bic r1, r1, #7 +; LEAF-FP-AAPCS-NEXT: str sp, [r11, #-24] +; LEAF-FP-AAPCS-NEXT: sub sp, sp, r1 +; LEAF-FP-AAPCS-NEXT: mov r1, #0 +; LEAF-FP-AAPCS-NEXT: str r0, [r11, #-32] +; LEAF-FP-AAPCS-NEXT: str r1, [r11, #-28] +; LEAF-FP-AAPCS-NEXT: sub sp, r11, #8 +; LEAF-FP-AAPCS-NEXT: pop {r4, r5, r11, lr} +; LEAF-FP-AAPCS-NEXT: mov pc, lr +; +; LEAF-NOFP-LABEL: required_fp: +; LEAF-NOFP: @ %bb.0: +; LEAF-NOFP-NEXT: .save {r4, r5, r11} +; LEAF-NOFP-NEXT: push {r4, r5, r11} +; LEAF-NOFP-NEXT: .setfp r11, sp, #8 +; LEAF-NOFP-NEXT: add r11, sp, #8 +; LEAF-NOFP-NEXT: .pad #20 +; LEAF-NOFP-NEXT: sub sp, sp, #20 +; LEAF-NOFP-NEXT: str r1, [r11, #-16] +; LEAF-NOFP-NEXT: mov r1, #7 +; LEAF-NOFP-NEXT: add r1, r1, r0, lsl #2 +; LEAF-NOFP-NEXT: str r0, [r11, #-12] +; LEAF-NOFP-NEXT: bic r1, r1, #7 +; LEAF-NOFP-NEXT: str sp, [r11, #-20] +; LEAF-NOFP-NEXT: sub sp, sp, r1 +; LEAF-NOFP-NEXT: mov r1, #0 +; LEAF-NOFP-NEXT: str r0, [r11, #-28] +; LEAF-NOFP-NEXT: str r1, [r11, #-24] +; LEAF-NOFP-NEXT: sub sp, r11, #8 +; LEAF-NOFP-NEXT: pop {r4, r5, r11} +; LEAF-NOFP-NEXT: mov pc, lr +; +; LEAF-NOFP-AAPCS-LABEL: required_fp: +; LEAF-NOFP-AAPCS: @ %bb.0: +; LEAF-NOFP-AAPCS-NEXT: .save {r4, r5, r11, lr} +; LEAF-NOFP-AAPCS-NEXT: push {r4, r5, r11, lr} +; LEAF-NOFP-AAPCS-NEXT: .setfp r11, sp, #8 +; LEAF-NOFP-AAPCS-NEXT: add r11, sp, #8 +; LEAF-NOFP-AAPCS-NEXT: .pad #24 +; LEAF-NOFP-AAPCS-NEXT: sub sp, sp, #24 +; LEAF-NOFP-AAPCS-NEXT: str r1, [r11, #-16] +; LEAF-NOFP-AAPCS-NEXT: mov r1, #7 +; LEAF-NOFP-AAPCS-NEXT: add r1, r1, r0, lsl #2 +; LEAF-NOFP-AAPCS-NEXT: str r0, [r11, #-12] +; LEAF-NOFP-AAPCS-NEXT: bic r1, r1, #7 +; LEAF-NOFP-AAPCS-NEXT: str sp, [r11, #-24] +; LEAF-NOFP-AAPCS-NEXT: sub sp, sp, r1 +; LEAF-NOFP-AAPCS-NEXT: mov r1, #0 +; LEAF-NOFP-AAPCS-NEXT: str r0, [r11, #-32] +; LEAF-NOFP-AAPCS-NEXT: str r1, [r11, #-28] +; LEAF-NOFP-AAPCS-NEXT: sub sp, r11, #8 +; LEAF-NOFP-AAPCS-NEXT: pop {r4, r5, r11, lr} +; LEAF-NOFP-AAPCS-NEXT: mov pc, lr + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i8*, align 8 + %6 = alloca i64, align 8 + store i32 %0, i32* %3, align 4 + store i32 %1, i32* %4, align 4 + %7 = load i32, i32* %3, align 4 + %8 = zext i32 %7 to i64 + %9 = call i8* @llvm.stacksave() + store i8* %9, i8** %5, align 8 + %10 = alloca i32, i64 %8, align 4 + store i64 %8, i64* %6, align 8 + ret void +} diff --git a/llvm/test/CodeGen/Thumb/frame-access.ll b/llvm/test/CodeGen/Thumb/frame-access.ll --- a/llvm/test/CodeGen/Thumb/frame-access.ll +++ b/llvm/test/CodeGen/Thumb/frame-access.ll @@ -1,4 +1,7 @@ -; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=none %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=none %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP,CHECK-ATPCS +; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=all %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-FP-ATPCS,CHECK-ATPCS +; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=none -mattr=+aapcs-frame-chain-leaf %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP,CHECK-AAPCS +; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=all -mattr=+aapcs-frame-chain-leaf %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-FP-AAPCS,CHECK-AAPCS ; struct S { int x[128]; } s; ; int f(int *, int, int, int, struct S); @@ -12,6 +15,7 @@ @s = common dso_local global %struct.S zeroinitializer, align 4 declare void @llvm.va_start(i8*) +declare dso_local i32 @i(i32) local_unnamed_addr declare dso_local i32 @g(i32*, i32, i32, i32, i32, i32) local_unnamed_addr declare dso_local i32 @f(i32*, i32, i32, i32, %struct.S* byval(%struct.S) align 4) local_unnamed_addr declare dso_local i32 @h(i32*, i32*, i32*) local_unnamed_addr @@ -21,7 +25,7 @@ ; Test access to arguments, passed on stack (including varargs) ; -; Usual case, access via SP +; Usual case, access via SP if FP is not available ; int test_args_sp(int a, int b, int c, int d, int e) { ; int v[4]; ; return g(v, a, b, c, d, e); @@ -36,7 +40,10 @@ } ; CHECK-LABEL: test_args_sp ; Load `e` -; CHECK: ldr r0, [sp, #32] +; CHECK-NOFP: ldr r0, [sp, #32] +; CHECK-FP-ATPCS: ldr r0, [r7, #8] +; CHECK-FP-AAPCS: mov r0, r11 +; CHECK-FP-AAPCS: ldr r0, [r0, #8] ; CHECK-NEXT: str r3, [sp] ; Pass `e` on stack ; CHECK-NEXT: str r0, [sp, #4] @@ -63,9 +70,18 @@ ; Three incoming varargs in registers ; CHECK: sub sp, #12 ; CHECK: sub sp, #28 -; Incoming arguments area is accessed via SP -; CHECK: add r0, sp, #36 -; CHECK: stm r0!, {r1, r2, r3} +; Incoming arguments area is accessed via SP if FP is not available +; CHECK-NOFP: add r0, sp, #36 +; CHECK-NOFP: stm r0!, {r1, r2, r3} +; CHECK-FP-ATPCS: mov r0, r7 +; CHECK-FP-ATPCS: adds r0, #8 +; CHECK-FP-ATPCS: stm r0!, {r1, r2, r3} +; CHECK-FP-AAPCS: mov r0, r11 +; CHECK-FP-AAPCS: str r1, [r0, #8] +; CHECK-FP-AAPCS: mov r0, r11 +; CHECK-FP-AAPCS: str r2, [r0, #12] +; CHECK-FP-AAPCS: mov r0, r11 +; CHECK-FP-AAPCS: str r3, [r0, #16] ; Re-aligned stack, access via FP ; int test_args_realign(int a, int b, int c, int d, int e) { @@ -83,14 +99,17 @@ } ; CHECK-LABEL: test_args_realign ; Setup frame pointer -; CHECK: add r7, sp, #8 +; CHECK-ATPCS: add r7, sp, #8 +; CHECK-AAPCS: mov r11, sp ; Align stack ; CHECK: mov r4, sp ; CHECK-NEXT: lsrs r4, r4, #4 ; CHECK-NEXT: lsls r4, r4, #4 ; CHECK-NEXT: mov sp, r4 ; Load `e` via FP -; CHECK: ldr r0, [r7, #8] +; CHECK-ATPCS: ldr r0, [r7, #8] +; CHECK-AAPCS: mov r0, r11 +; CHECK-AAPCS: ldr r0, [r0, #8] ; CHECK-NEXT: str r3, [sp] ; Pass `e` as argument ; CHECK-NEXT: str r0, [sp, #4] @@ -117,16 +136,23 @@ ; Three incoming register varargs ; CHECK: sub sp, #12 ; Setup frame pointer -; CHECK: add r7, sp, #8 +; CHECK-ATPCS: add r7, sp, #8 +; CHECK-AAPCS: mov r11, sp ; Align stack ; CHECK: mov r4, sp ; CHECK-NEXT: lsrs r4, r4, #4 ; CHECK-NEXT: lsls r4, r4, #4 ; CHECK-NEXT: mov sp, r4 ; Incoming register varargs stored via FP -; CHECK: mov r0, r7 -; CHECK-NEXT: adds r0, #8 -; CHECK-NEXT: stm r0!, {r1, r2, r3} +; CHECK-ATPCS: mov r0, r7 +; CHECK-ATPCS-NEXT: adds r0, #8 +; CHECK-ATPCS-NEXT: stm r0!, {r1, r2, r3} +; CHECK-AAPCS: mov r0, r11 +; CHECK-AAPCS: str r1, [r0, #8] +; CHECK-AAPCS: mov r0, r11 +; CHECK-AAPCS: str r2, [r0, #12] +; CHECK-AAPCS: mov r0, r11 +; CHECK-AAPCS: str r3, [r0, #16] ; VLAs present, access via FP ; int test_args_vla(int a, int b, int c, int d, int e) { ; int v[a]; @@ -140,11 +166,14 @@ } ; CHECK-LABEL: test_args_vla ; Setup frame pointer -; CHECK: add r7, sp, #12 +; CHECK-ATPCS: add r7, sp, #12 +; CHECK-AAPCS: mov r11, sp ; Allocate outgoing stack arguments space -; CHECK: sub sp, #4 +; CHECK: sub sp, #8 ; Load `e` via FP -; CHECK: ldr r5, [r7, #8] +; CHECK-ATPCS: ldr r5, [r7, #8] +; CHECK-AAPCS: mov r5, r11 +; CHECK-AAPCS: ldr r5, [r5, #8] ; Pass `d` and `e` as arguments ; CHECK-NEXT: str r3, [sp] ; CHECK-NEXT: str r5, [sp, #4] @@ -169,11 +198,18 @@ ; Three incoming register varargs ; CHECK: sub sp, #12 ; Setup frame pointer -; CHECK: add r7, sp, #8 +; CHECK-ATPCS: add r7, sp, #8 +; CHECK-AAPCS: mov r11, sp ; Register varargs stored via FP -; CHECK-DAG: str r3, [r7, #16] -; CHECK-DAG: str r2, [r7, #12] -; CHECK-DAG: str r1, [r7, #8] +; CHECK-ATPCS-DAG: str r3, [r7, #16] +; CHECK-ATPCS-DAG: str r2, [r7, #12] +; CHECK-ATPCS-DAG: str r1, [r7, #8] +; CHECK-AAPCS-DAG: mov r5, r11 +; CHECK-AAPCS-DAG: str r1, [r5, #8] +; CHECK-AAPCS-DAG: mov r1, r11 +; CHECK-AAPCS-DAG: str r3, [r1, #16] +; CHECK-AAPCS-DAG: mov r1, r11 +; CHECK-AAPCS-DAG: str r2, [r1, #12] ; Moving SP, access via SP ; int test_args_moving_sp(int a, int b, int c, int d, int e) { @@ -195,17 +231,32 @@ ret i32 %add7 } ; CHECK-LABEL: test_args_moving_sp -; 20 bytes callee-saved area -; CHECK: push {r4, r5, r6, r7, lr} -; 20 bytes locals -; CHECK: sub sp, #20 +; 20 bytes callee-saved area without FP +; CHECK-NOFP: push {r4, r5, r6, r7, lr} +; 20 bytes callee-saved area for ATPCS +; CHECK-FP-ATPCS: push {r4, r5, r6, r7, lr} +; 24 bytes callee-saved area for AAPCS as codegen prefers an even number of GPRs spilled +; CHECK-FP-AAPCS: push {lr} +; CHECK-FP-AAPCS: mov lr, r11 +; CHECK-FP-AAPCS: push {lr} +; CHECK-FP-AAPCS: push {r4, r5, r6, r7} +; 20 bytes locals without FP +; CHECK-NOFP: sub sp, #20 +; 28 bytes locals with FP for ATPCS +; CHECK-FP-ATPCS: sub sp, #28 +; 24 bytes locals with FP for AAPCS +; CHECK-FP-AAPCS: sub sp, #24 ; Setup base pointer ; CHECK: mov r6, sp ; Allocate outgoing arguments space ; CHECK: sub sp, #508 ; CHECK: sub sp, #4 -; Load `e` via BP, 40 = 20 + 20 -; CHECK: ldr r3, [r6, #40] +; Load `e` via BP if FP is not present (40 = 20 + 20) +; CHECK-NOFP: ldr r3, [r6, #40] +; Load `e` via FP otherwise +; CHECK-FP-ATPCS: ldr r3, [r7, #8] +; CHECK-FP-AAPCS: mov r0, r11 +; CHECK-FP-AAPCS: ldr r3, [r0, #8] ; CHECK: bl f ; Stack restored before next call ; CHECK-NEXT: add sp, #508 @@ -236,14 +287,53 @@ ; CHECK-LABEL: test_varargs_moving_sp ; Three incoming register varargs ; CHECK: sub sp, #12 -; 16 bytes callee-saves -; CHECK: push {r4, r5, r6, lr} -; 20 bytes locals -; CHECK: sub sp, #20 -; Incoming varargs stored via BP, 36 = 20 + 16 -; CHECK: mov r0, r6 -; CHECK-NEXT: adds r0, #36 -; CHECK-NEXT: stm r0!, {r1, r2, r3} +; 16 bytes callee-saves without FP +; CHECK-NOFP: push {r4, r5, r6, lr} +; 24 bytes callee-saves with FP +; CHECK-FP-ATPCS: push {r4, r5, r6, r7, lr} +; CHECK-FP-AAPCS: push {lr} +; CHECK-FP-AAPCS: mov lr, r11 +; CHECK-FP-AAPCS: push {lr} +; CHECK-FP-AAPCS: push {r4, r5, r6, r7} +; Locals area +; CHECK-NOFP: sub sp, #20 +; CHECK-FP-ATPCS: sub sp, #24 +; CHECK-FP-AAPCS: sub sp, #20 +; Incoming varargs stored via BP if FP is not present (36 = 20 + 16) +; CHECK-NOFP: mov r0, r6 +; CHECK-NOFP-NEXT: adds r0, #36 +; CHECK-NOFP-NEXT: stm r0!, {r1, r2, r3} +; Incoming varargs stored via FP otherwise +; CHECK-FP-ATPCS: mov r0, r7 +; CHECK-FP-ATPCS-NEXT: adds r0, #8 +; CHECK-FP-ATPCS-NEXT: stm r0!, {r1, r2, r3} +; CHECK-FP-AAPCS: mov r0, r11 +; CHECK-FP-AAPCS-NEXT: str r1, [r0, #8] +; CHECK-FP-AAPCS-NEXT: mov r0, r11 +; CHECK-FP-AAPCS-NEXT: str r2, [r0, #12] +; CHECK-FP-AAPCS-NEXT: mov r0, r11 +; CHECK-FP-AAPCS-NEXT: str r3, [r0, #16] + +; struct S { int x[128]; } s; +; int test(S a, int b) { +; return i(b); +; } +define dso_local i32 @test_args_large_offset(%struct.S* byval(%struct.S) align 4 %0, i32 %1) local_unnamed_addr { + %3 = alloca i32, align 4 + store i32 %1, i32* %3, align 4 + %4 = load i32, i32* %3, align 4 + %5 = call i32 @i(i32 %4) + ret i32 %5 +} +; CHECK-LABEL: test_args_large_offset +; Without FP: Access to large offset is made using SP +; CHECK-NOFP: ldr r0, [sp, #520] +; With FP: Access to large offset is made through a const pool using FP +; CHECK-FP: ldr r0, .LCPI0_0 +; CHECK-FP-ATPCS: ldr r0, [r0, r7] +; CHECK-FP-AAPCS: add r0, r11 +; CHECK-FP-AAPCS: ldr r0, [r0] +; CHECK: bl i ; ; Access to locals @@ -313,7 +403,8 @@ } ; CHECK-LABEL: test_local_realign ; Setup frame pointer -; CHECK: add r7, sp, #8 +; CHECK-ATPCS: add r7, sp, #8 +; CHECK-AAPCS: mov r11, sp ; Re-align stack ; CHECK: mov r4, sp ; CHECK-NEXT: lsrs r4, r4, #4 @@ -355,15 +446,24 @@ } ; CHECK-LABEL: test_local_vla ; Setup frame pointer -; CHECK: add r7, sp, #12 +; CHECK-ATPCS: add r7, sp, #12 +; CHECK-AAPCS: mov r11, sp +; Locas area +; CHECK-ATPCS: sub sp, #12 +; CHECK-AAPCS: sub sp, #16 ; Setup base pointer ; CHECK: mov r6, sp -; CHECK: mov r5, r6 +; CHECK-ATPCS: mov r5, r6 +; CHECK-AAPCS: adds r5, r6, #4 ; Arguments to `h` compute relative to BP ; CHECK: adds r0, r6, #7 -; CHECK-NEXT: adds r0, #1 -; CHECK-NEXT: adds r1, r6, #4 -; CHECK-NEXT: mov r2, r6 +; CHECK-ATPCS-NEXT: adds r0, #1 +; CHECK-ATPCS-NEXT: adds r1, r6, #4 +; CHECK-ATPCS-NEXT: mov r2, r6 +; CHECK-AAPCS-NEXT: adds r0, #5 +; CHECK-AAPCS-NEXT: adds r1, r6, #7 +; CHECK-AAPCS-NEXT: adds r1, #1 +; CHECK-AAPCS-NEXT: adds r2, r6, #4 ; CHECK-NEXT: bl h ; Load `x`, `y`, `z` via BP (r5 should still have the value of r6 from the move ; above) @@ -396,7 +496,9 @@ } ; CHECK-LABEL: test_local_moving_sp ; Locals area -; CHECK: sub sp, #36 +; CHECK-NOFP: sub sp, #36 +; CHECK-FP-ATPCS: sub sp, #44 +; CHECK-FP-AAPCS: sub sp, #40 ; Setup BP ; CHECK: mov r6, sp ; Outoging arguments @@ -404,12 +506,24 @@ ; CHECK-NEXT: sub sp, #508 ; CHECK-NEXT: sub sp, #8 ; Argument addresses computed relative to BP -; CHECK: adds r4, r6, #7 -; CHECK-NEXT: adds r4, #13 -; CHECK: adds r1, r6, #7 -; CHECK-NEXT: adds r1, #9 -; CHECK: adds r5, r6, #7 -; CHECK-NEXT: adds r5, #5 +; CHECK-NOFP: adds r4, r6, #7 +; CHECK-NOFP-NEXT: adds r4, #13 +; CHECK-NOFP: adds r1, r6, #7 +; CHECK-NOFP-NEXT: adds r1, #9 +; CHECK-NOFP: adds r5, r6, #7 +; CHECK-NOFP-NEXT: adds r5, #5 +; CHECK-FP-ATPCS: adds r0, r6, #7 +; CHECK-FP-ATPCS-NEXT: adds r0, #21 +; CHECK-FP-ATPCS: adds r1, r6, #7 +; CHECK-FP-ATPCS-NEXT: adds r1, #17 +; CHECK-FP-ATPCS: adds r5, r6, #7 +; CHECK-FP-ATPCS-NEXT: adds r5, #13 +; CHECK-FP-AAPCS: adds r4, r6, #7 +; CHECK-FP-AAPCS-NEXT: adds r4, #17 +; CHECK-FP-AAPCS: adds r1, r6, #7 +; CHECK-FP-AAPCS-NEXT: adds r1, #13 +; CHECK-FP-AAPCS: adds r5, r6, #7 +; CHECK-FP-AAPCS-NEXT: adds r5, #9 ; CHECK: bl u ; Stack restored before next call ; CHECK: add sp, #508 diff --git a/llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll b/llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll @@ -0,0 +1,27 @@ +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all 2>&1 | FileCheck %s --check-prefix=RESERVED-R7 +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 + +declare void @leaf(i32 %input) + +define void @reserved_r7(i32 %input) { +; RESERVED-NONE-NOT: error: write to reserved register 'R7' +; RESERVED-R7: error: write to reserved register 'R7' +; RESERVED-R11-NOT: error: write to reserved register 'R7' + %1 = call i32 asm sideeffect "mov $0, $1", "={r7},r"(i32 %input) + ret void +} + +define void @reserved_r11(i32 %input) { +; RESERVED-NONE-NOT: error: write to reserved register 'R11' +; RESERVED-R7-NOT: error: write to reserved register 'R11' +; RESERVED-R11: error: write to reserved register 'R11' + %1 = call i32 asm sideeffect "mov $0, $1", "={r11},r"(i32 %input) + ret void +} diff --git a/llvm/test/CodeGen/Thumb/frame-chain.ll b/llvm/test/CodeGen/Thumb/frame-chain.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb/frame-chain.ll @@ -0,0 +1,274 @@ +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all --verify-machineinstrs | FileCheck %s --check-prefixes=FP,LEAF-FP +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain --verify-machineinstrs | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf --verify-machineinstrs | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP-AAPCS +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf --verify-machineinstrs | FileCheck %s --check-prefixes=FP,LEAF-NOFP +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain --verify-machineinstrs | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf --verify-machineinstrs | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP-AAPCS +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none --verify-machineinstrs | FileCheck %s --check-prefixes=NOFP,LEAF-NOFP +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain --verify-machineinstrs | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf --verify-machineinstrs | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP-AAPCS + +define dso_local noundef i32 @leaf(i32 noundef %0) { +; LEAF-FP-LABEL: leaf: +; LEAF-FP: @ %bb.0: +; LEAF-FP-NEXT: .pad #4 +; LEAF-FP-NEXT: sub sp, #4 +; LEAF-FP-NEXT: str r0, [sp] +; LEAF-FP-NEXT: adds r0, r0, #4 +; LEAF-FP-NEXT: add sp, #4 +; LEAF-FP-NEXT: bx lr +; +; LEAF-FP-AAPCS-LABEL: leaf: +; LEAF-FP-AAPCS: @ %bb.0: +; LEAF-FP-AAPCS-NEXT: .save {lr} +; LEAF-FP-AAPCS-NEXT: push {lr} +; LEAF-FP-AAPCS-NEXT: mov lr, r11 +; LEAF-FP-AAPCS-NEXT: .save {r11} +; LEAF-FP-AAPCS-NEXT: push {lr} +; LEAF-FP-AAPCS-NEXT: .setfp r11, sp +; LEAF-FP-AAPCS-NEXT: mov r11, sp +; LEAF-FP-AAPCS-NEXT: .pad #4 +; LEAF-FP-AAPCS-NEXT: sub sp, #4 +; LEAF-FP-AAPCS-NEXT: str r0, [sp] +; LEAF-FP-AAPCS-NEXT: adds r0, r0, #4 +; LEAF-FP-AAPCS-NEXT: add sp, #4 +; LEAF-FP-AAPCS-NEXT: pop {r1} +; LEAF-FP-AAPCS-NEXT: mov r11, r1 +; LEAF-FP-AAPCS-NEXT: pop {pc} +; +; LEAF-NOFP-LABEL: leaf: +; LEAF-NOFP: @ %bb.0: +; LEAF-NOFP-NEXT: .pad #4 +; LEAF-NOFP-NEXT: sub sp, #4 +; LEAF-NOFP-NEXT: str r0, [sp] +; LEAF-NOFP-NEXT: adds r0, r0, #4 +; LEAF-NOFP-NEXT: add sp, #4 +; LEAF-NOFP-NEXT: bx lr +; +; LEAF-NOFP-AAPCS-LABEL: leaf: +; LEAF-NOFP-AAPCS: @ %bb.0: +; LEAF-NOFP-AAPCS-NEXT: .pad #4 +; LEAF-NOFP-AAPCS-NEXT: sub sp, #4 +; LEAF-NOFP-AAPCS-NEXT: str r0, [sp] +; LEAF-NOFP-AAPCS-NEXT: adds r0, r0, #4 +; LEAF-NOFP-AAPCS-NEXT: add sp, #4 +; LEAF-NOFP-AAPCS-NEXT: bx lr + %2 = alloca i32, align 4 + store i32 %0, i32* %2, align 4 + %3 = load i32, i32* %2, align 4 + %4 = add nsw i32 %3, 4 + ret i32 %4 +} + +define dso_local noundef i32 @non_leaf(i32 noundef %0) { +; FP-LABEL: non_leaf: +; FP: @ %bb.0: +; FP-NEXT: .save {r7, lr} +; FP-NEXT: push {r7, lr} +; FP-NEXT: .setfp r7, sp +; FP-NEXT: add r7, sp, #0 +; FP-NEXT: .pad #8 +; FP-NEXT: sub sp, #8 +; FP-NEXT: str r0, [sp, #4] +; FP-NEXT: bl leaf +; FP-NEXT: adds r0, r0, #1 +; FP-NEXT: add sp, #8 +; FP-NEXT: pop {r7, pc} +; +; FP-AAPCS-LABEL: non_leaf: +; FP-AAPCS: @ %bb.0: +; FP-AAPCS-NEXT: .save {lr} +; FP-AAPCS-NEXT: push {lr} +; FP-AAPCS-NEXT: mov lr, r11 +; FP-AAPCS-NEXT: .save {r11} +; FP-AAPCS-NEXT: push {lr} +; FP-AAPCS-NEXT: .setfp r11, sp +; FP-AAPCS-NEXT: mov r11, sp +; FP-AAPCS-NEXT: .pad #8 +; FP-AAPCS-NEXT: sub sp, #8 +; FP-AAPCS-NEXT: str r0, [sp, #4] +; FP-AAPCS-NEXT: bl leaf +; FP-AAPCS-NEXT: adds r0, r0, #1 +; FP-AAPCS-NEXT: add sp, #8 +; FP-AAPCS-NEXT: pop {r1} +; FP-AAPCS-NEXT: mov r11, r1 +; FP-AAPCS-NEXT: pop {pc} +; +; NOFP-LABEL: non_leaf: +; NOFP: @ %bb.0: +; NOFP-NEXT: .save {r7, lr} +; NOFP-NEXT: push {r7, lr} +; NOFP-NEXT: .pad #8 +; NOFP-NEXT: sub sp, #8 +; NOFP-NEXT: str r0, [sp, #4] +; NOFP-NEXT: bl leaf +; NOFP-NEXT: adds r0, r0, #1 +; NOFP-NEXT: add sp, #8 +; NOFP-NEXT: pop {r7, pc} +; +; NOFP-AAPCS-LABEL: non_leaf: +; NOFP-AAPCS: @ %bb.0: +; NOFP-AAPCS-NEXT: .save {r7, lr} +; NOFP-AAPCS-NEXT: push {r7, lr} +; NOFP-AAPCS-NEXT: .pad #8 +; NOFP-AAPCS-NEXT: sub sp, #8 +; NOFP-AAPCS-NEXT: str r0, [sp, #4] +; NOFP-AAPCS-NEXT: bl leaf +; NOFP-AAPCS-NEXT: adds r0, r0, #1 +; NOFP-AAPCS-NEXT: add sp, #8 +; NOFP-AAPCS-NEXT: pop {r7, pc} + %2 = alloca i32, align 4 + store i32 %0, i32* %2, align 4 + %3 = load i32, i32* %2, align 4 + %4 = call noundef i32 @leaf(i32 noundef %3) + %5 = add nsw i32 %4, 1 + ret i32 %5 +} + +declare i8* @llvm.stacksave() +define dso_local void @required_fp(i32 %0, i32 %1) { +; FP-LABEL: required_fp: +; FP: @ %bb.0: +; FP-NEXT: .save {r4, r6, r7, lr} +; FP-NEXT: push {r4, r6, r7, lr} +; FP-NEXT: .setfp r7, sp, #8 +; FP-NEXT: add r7, sp, #8 +; FP-NEXT: .pad #24 +; FP-NEXT: sub sp, #24 +; FP-NEXT: mov r6, sp +; FP-NEXT: mov r2, r6 +; FP-NEXT: str r1, [r2, #16] +; FP-NEXT: str r0, [r2, #20] +; FP-NEXT: mov r1, sp +; FP-NEXT: str r1, [r2, #8] +; FP-NEXT: lsls r1, r0, #2 +; FP-NEXT: adds r1, r1, #7 +; FP-NEXT: movs r3, #7 +; FP-NEXT: bics r1, r3 +; FP-NEXT: mov r3, sp +; FP-NEXT: subs r1, r3, r1 +; FP-NEXT: mov sp, r1 +; FP-NEXT: movs r1, #0 +; FP-NEXT: str r1, [r6, #4] +; FP-NEXT: str r0, [r2] +; FP-NEXT: subs r4, r7, #7 +; FP-NEXT: subs r4, #1 +; FP-NEXT: mov sp, r4 +; FP-NEXT: pop {r4, r6, r7, pc} +; +; FP-AAPCS-LABEL: required_fp: +; FP-AAPCS: @ %bb.0: +; FP-AAPCS-NEXT: .save {lr} +; FP-AAPCS-NEXT: push {lr} +; FP-AAPCS-NEXT: mov lr, r11 +; FP-AAPCS-NEXT: .save {r11} +; FP-AAPCS-NEXT: push {lr} +; FP-AAPCS-NEXT: .setfp r11, sp +; FP-AAPCS-NEXT: mov r11, sp +; FP-AAPCS-NEXT: .save {r4, r6} +; FP-AAPCS-NEXT: push {r4, r6} +; FP-AAPCS-NEXT: .pad #24 +; FP-AAPCS-NEXT: sub sp, #24 +; FP-AAPCS-NEXT: mov r6, sp +; FP-AAPCS-NEXT: mov r2, r6 +; FP-AAPCS-NEXT: str r1, [r2, #16] +; FP-AAPCS-NEXT: str r0, [r2, #20] +; FP-AAPCS-NEXT: mov r1, sp +; FP-AAPCS-NEXT: str r1, [r2, #8] +; FP-AAPCS-NEXT: lsls r1, r0, #2 +; FP-AAPCS-NEXT: adds r1, r1, #7 +; FP-AAPCS-NEXT: movs r3, #7 +; FP-AAPCS-NEXT: bics r1, r3 +; FP-AAPCS-NEXT: mov r3, sp +; FP-AAPCS-NEXT: subs r1, r3, r1 +; FP-AAPCS-NEXT: mov sp, r1 +; FP-AAPCS-NEXT: movs r1, #0 +; FP-AAPCS-NEXT: str r1, [r6, #4] +; FP-AAPCS-NEXT: str r0, [r2] +; FP-AAPCS-NEXT: mov r4, r11 +; FP-AAPCS-NEXT: subs r4, #8 +; FP-AAPCS-NEXT: mov sp, r4 +; FP-AAPCS-NEXT: pop {r4, r6} +; FP-AAPCS-NEXT: pop {r0} +; FP-AAPCS-NEXT: mov r11, r0 +; FP-AAPCS-NEXT: pop {pc} +; +; NOFP-LABEL: required_fp: +; NOFP: @ %bb.0: +; NOFP-NEXT: .save {r4, r6, r7, lr} +; NOFP-NEXT: push {r4, r6, r7, lr} +; NOFP-NEXT: .setfp r7, sp, #8 +; NOFP-NEXT: add r7, sp, #8 +; NOFP-NEXT: .pad #24 +; NOFP-NEXT: sub sp, #24 +; NOFP-NEXT: mov r6, sp +; NOFP-NEXT: mov r2, r6 +; NOFP-NEXT: str r1, [r2, #16] +; NOFP-NEXT: str r0, [r2, #20] +; NOFP-NEXT: mov r1, sp +; NOFP-NEXT: str r1, [r2, #8] +; NOFP-NEXT: lsls r1, r0, #2 +; NOFP-NEXT: adds r1, r1, #7 +; NOFP-NEXT: movs r3, #7 +; NOFP-NEXT: bics r1, r3 +; NOFP-NEXT: mov r3, sp +; NOFP-NEXT: subs r1, r3, r1 +; NOFP-NEXT: mov sp, r1 +; NOFP-NEXT: movs r1, #0 +; NOFP-NEXT: str r1, [r6, #4] +; NOFP-NEXT: str r0, [r2] +; NOFP-NEXT: subs r4, r7, #7 +; NOFP-NEXT: subs r4, #1 +; NOFP-NEXT: mov sp, r4 +; NOFP-NEXT: pop {r4, r6, r7, pc} +; +; NOFP-AAPCS-LABEL: required_fp: +; NOFP-AAPCS: @ %bb.0: +; NOFP-AAPCS-NEXT: .save {lr} +; NOFP-AAPCS-NEXT: push {lr} +; NOFP-AAPCS-NEXT: mov lr, r11 +; NOFP-AAPCS-NEXT: .save {r11} +; NOFP-AAPCS-NEXT: push {lr} +; NOFP-AAPCS-NEXT: .setfp r11, sp +; NOFP-AAPCS-NEXT: mov r11, sp +; NOFP-AAPCS-NEXT: .save {r4, r6} +; NOFP-AAPCS-NEXT: push {r4, r6} +; NOFP-AAPCS-NEXT: .pad #24 +; NOFP-AAPCS-NEXT: sub sp, #24 +; NOFP-AAPCS-NEXT: mov r6, sp +; NOFP-AAPCS-NEXT: mov r2, r6 +; NOFP-AAPCS-NEXT: str r1, [r2, #16] +; NOFP-AAPCS-NEXT: str r0, [r2, #20] +; NOFP-AAPCS-NEXT: mov r1, sp +; NOFP-AAPCS-NEXT: str r1, [r2, #8] +; NOFP-AAPCS-NEXT: lsls r1, r0, #2 +; NOFP-AAPCS-NEXT: adds r1, r1, #7 +; NOFP-AAPCS-NEXT: movs r3, #7 +; NOFP-AAPCS-NEXT: bics r1, r3 +; NOFP-AAPCS-NEXT: mov r3, sp +; NOFP-AAPCS-NEXT: subs r1, r3, r1 +; NOFP-AAPCS-NEXT: mov sp, r1 +; NOFP-AAPCS-NEXT: movs r1, #0 +; NOFP-AAPCS-NEXT: str r1, [r6, #4] +; NOFP-AAPCS-NEXT: str r0, [r2] +; NOFP-AAPCS-NEXT: mov r4, r11 +; NOFP-AAPCS-NEXT: subs r4, #8 +; NOFP-AAPCS-NEXT: mov sp, r4 +; NOFP-AAPCS-NEXT: pop {r4, r6} +; NOFP-AAPCS-NEXT: pop {r0} +; NOFP-AAPCS-NEXT: mov r11, r0 +; NOFP-AAPCS-NEXT: pop {pc} + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i8*, align 8 + %6 = alloca i64, align 8 + store i32 %0, i32* %3, align 4 + store i32 %1, i32* %4, align 4 + %7 = load i32, i32* %3, align 4 + %8 = zext i32 %7 to i64 + %9 = call i8* @llvm.stacksave() + store i8* %9, i8** %5, align 8 + %10 = alloca i32, i64 %8, align 4 + store i64 %8, i64* %6, align 8 + ret void +}