diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3432,7 +3432,9 @@ BothFlags<[NoXarchOption, CC1Option], " the AAPCS standard requirement stating that" " volatile bit-field width is dictated by the field container type. (ARM only).">>, Group; - +def mframe_chain : Joined<["-"], "mframe-chain=">, + Group, Values<"none,aapcs,aapcs+leaf">, + HelpText<"Select the frame chain model used to emit frame records (Arm only).">; def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group, HelpText<"Generate code which only uses the general purpose registers (AArch64/x86 only)">; def mfix_cmse_cve_2021_35465 : Flag<["-"], "mfix-cmse-cve-2021-35465">, diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -717,6 +717,15 @@ } } + // Propagate frame-chain model selection + if (Arg *A = Args.getLastArg(options::OPT_mframe_chain)) { + StringRef FrameChainOption = A->getValue(); + if (FrameChainOption.startswith("aapcs")) + Features.push_back("+aapcs-frame-chain"); + if (FrameChainOption == "aapcs+leaf") + Features.push_back("+aapcs-frame-chain-leaf"); + } + // CMSE: Check for target 8M (for -mcmse to be applicable) is performed later. if (Args.getLastArg(options::OPT_mcmse)) Features.push_back("+8msecext"); diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -542,6 +542,16 @@ "Don't place a BTI instruction " "after a return-twice">; +def FeatureAAPCSFrameChain : SubtargetFeature<"aapcs-frame-chain", + "CreateAAPCSFrameChain", "true", + "Create an AAPCS compliant frame chain">; + +def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf", + "CreateAAPCSFrameChainLeaf", "true", + "Create an AAPCS compliant frame chain " + "for leaf functions", + [FeatureAAPCSFrameChain]>; + //===----------------------------------------------------------------------===// // ARM architecture class // diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -47,7 +47,8 @@ // | | // |-----------------------------------| // | | -// | prev_fp, prev_lr | +// | prev_lr | +// | prev_fp | // | (a.k.a. "frame record") | // | | // |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11) @@ -204,6 +205,8 @@ // ABI-required frame pointer. if (MF.getTarget().Options.DisableFramePointerElim(MF)) return true; + if (MF.getSubtarget().createAAPCSFrameChain()) + return true; // Frame pointer required for use within this function. return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() || @@ -720,6 +723,9 @@ // into spill area 1, including the FP in R11. In either case, it // is in area one and the adjustment needs to take place just after // that push. + // FIXME: The above is not necessary true when PACBTI is enabled. + // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes, + // so FP ends up on area two. if (HasFP) { MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push); @@ -1818,6 +1824,12 @@ return true; } +static bool requiresAAPCSFrameRecord(const MachineFunction &MF) { + const auto &Subtarget = MF.getSubtarget(); + return Subtarget.createAAPCSFrameChainLeaf() || + (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls()); +} + void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { @@ -1826,7 +1838,7 @@ // to take advantage the eliminateFrameIndex machinery. This also ensures it // is spilled in the order specified by getCalleeSavedRegs() to make it easier // to combine multiple loads / stores. - bool CanEliminateFrame = true; + bool CanEliminateFrame = !requiresAAPCSFrameRecord(MF); bool CS1Spilled = false; bool LRSpilled = false; unsigned NumGPRSpills = 0; @@ -2067,7 +2079,9 @@ SavedRegs.set(FramePtr); // If the frame pointer is required by the ABI, also spill LR so that we // emit a complete frame record. - if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) { + if ((requiresAAPCSFrameRecord(MF) || + MF.getTarget().Options.DisableFramePointerElim(MF)) && + !LRSpilled) { SavedRegs.set(ARM::LR); LRSpilled = true; NumGPRSpills++; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -430,7 +430,8 @@ } MCPhysReg getFramePointerReg() const { - if (isTargetDarwin() || (!isTargetWindows() && isThumb())) + if (isTargetDarwin() || + (!isTargetWindows() && isThumb() && !createAAPCSFrameChain())) return ARM::R7; return ARM::R11; } diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -208,6 +208,8 @@ for (const CalleeSavedInfo &I : CSI) { Register Reg = I.getReg(); int FI = I.getFrameIdx(); + if (Reg == FramePtr) + FramePtrSpillFI = FI; switch (Reg) { case ARM::R8: case ARM::R9: @@ -223,8 +225,6 @@ case ARM::R6: case ARM::R7: case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; GPRCS1Size += 4; break; default: @@ -232,10 +232,31 @@ } } + MachineBasicBlock::iterator GPRCS1Push, GPRCS2Push; if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { + GPRCS1Push = MBBI; ++MBBI; } + // Find last push instruction for GPRCS2 - spilling of high registers + // (r8-r11) could consist of multiple tPUSH and tMOVr instructions. + while (true) { + MachineBasicBlock::iterator OldMBBI = MBBI; + // Skip a run of tMOVr instructions + while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr) + MBBI++; + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { + GPRCS2Push = MBBI; + MBBI++; + } else { + // We have reached an instruction which is not a push, so the previous + // run of tMOVr instructions (which may have been empty) was not part of + // the prologue. Reset MBBI back to the last PUSH of the prologue. + MBBI = OldMBBI; + break; + } + } + // Determine starting offsets of spill areas. unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; @@ -252,71 +273,39 @@ int FramePtrOffsetInBlock = 0; unsigned adjustedGPRCS1Size = GPRCS1Size; if (GPRCS1Size > 0 && GPRCS2Size == 0 && - tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) { + tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) { FramePtrOffsetInBlock = NumBytes; adjustedGPRCS1Size += NumBytes; NumBytes = 0; } - - if (adjustedGPRCS1Size) { - CFAOffset += adjustedGPRCS1Size; - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - } - for (const CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: - if (STI.splitFramePushPop(MF)) - break; - LLVM_FALLTHROUGH; - case ARM::R0: - case ARM::R1: - case ARM::R2: - case ARM::R3: - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - break; - } - } + CFAOffset += adjustedGPRCS1Size; // Adjust FP so it point to the stack slot that contains the previous FP. if (HasFP) { + bool FPIsHighReg = ARM::hGPRRegClass.contains(FramePtr); + MachineBasicBlock::iterator AfterPush = + FPIsHighReg ? std::next(GPRCS2Push) : std::next(GPRCS1Push); FramePtrOffsetInBlock += MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; - BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) + if (FPIsHighReg) + FramePtrOffsetInBlock += GPRCS2Size; + BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr) .addReg(ARM::SP) .addImm(FramePtrOffsetInBlock / 4) .setMIFlags(MachineInstr::FrameSetup) .add(predOps(ARMCC::AL)); + if(FramePtrOffsetInBlock) { - CFAOffset -= FramePtrOffsetInBlock; unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( - nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + nullptr, MRI->getDwarfRegNum(FramePtr, true), (CFAOffset - FramePtrOffsetInBlock))); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } @@ -326,45 +315,69 @@ AFI->setShouldRestoreSPFromFP(true); } - // Skip past the spilling of r8-r11, which could consist of multiple tPUSH - // and tMOVr instructions. We don't need to add any call frame information - // in-between these instructions, because they do not modify the high - // registers. - while (true) { - MachineBasicBlock::iterator OldMBBI = MBBI; - // Skip a run of tMOVr instructions - while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr) - MBBI++; - if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { - MBBI++; - } else { - // We have reached an instruction which is not a push, so the previous - // run of tMOVr instructions (which may have been empty) was not part of - // the prologue. Reset MBBI back to the last PUSH of the prologue. - MBBI = OldMBBI; - break; + // Emit call frame information for the callee-saved low registers. + if (GPRCS1Size > 0) { + MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); + if (adjustedGPRCS1Size) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.splitFramePushPop(MF)) + break; + LLVM_FALLTHROUGH; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + break; + } } } // Emit call frame information for the callee-saved high registers. - for (auto &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: { - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - break; - } - default: - break; + if (GPRCS2Size > 0) { + MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); + for (auto &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: { + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + break; + } + default: + break; + } } } @@ -794,65 +807,53 @@ return true; } -using ARMRegSet = std::bitset; - -// Return the first iteraror after CurrentReg which is present in EnabledRegs, -// or OrderEnd if no further registers are in that set. This does not advance -// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs. -static const unsigned *findNextOrderedReg(const unsigned *CurrentReg, - const ARMRegSet &EnabledRegs, - const unsigned *OrderEnd) { - while (CurrentReg != OrderEnd && !EnabledRegs[*CurrentReg]) - ++CurrentReg; - return CurrentReg; -} - -bool Thumb1FrameLowering::spillCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - ArrayRef CSI, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL; - const TargetInstrInfo &TII = *STI.getInstrInfo(); - MachineFunction &MF = *MBB.getParent(); - const ARMBaseRegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); - - ARMRegSet LoRegsToSave; // r0-r7, lr - ARMRegSet HiRegsToSave; // r8-r11 - ARMRegSet CopyRegs; // Registers which can be used after pushing - // LoRegs for saving HiRegs. - - for (const CalleeSavedInfo &I : llvm::reverse(CSI)) { - Register Reg = I.getReg(); - +static const SmallVector OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6, + ARM::R7, ARM::LR}; +static const SmallVector OrderedHighRegs = {ARM::R8, ARM::R9, + ARM::R10, ARM::R11}; +static const SmallVector OrderedCopyRegs = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, + ARM::R5, ARM::R6, ARM::R7, ARM::LR}; + +static void splitLowAndHighRegs(const std::set &Regs, + std::set &LowRegs, + std::set &HighRegs) { + for (Register Reg : Regs) { if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LoRegsToSave[Reg] = true; + LowRegs.insert(Reg); } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HiRegsToSave[Reg] = true; + HighRegs.insert(Reg); } else { llvm_unreachable("callee-saved register of unexpected class"); } - - if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && - !MF.getRegInfo().isLiveIn(Reg) && - !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) - CopyRegs[Reg] = true; } +} - // Unused argument registers can be used for the high register saving. - for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) - if (!MF.getRegInfo().isLiveIn(ArgReg)) - CopyRegs[ArgReg] = true; +template +It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt, + const std::set &RegSet) { + return std::find_if(OrderedStartIt, OrderedEndIt, + [&](Register Reg) { return RegSet.count(Reg); }); +} - // Push the low registers and lr +static void pushRegsToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const TargetInstrInfo &TII, + const std::set &RegsToSave, + const std::set &CopyRegs) { + MachineFunction &MF = *MBB.getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (!LoRegsToSave.none()) { + DebugLoc DL; + + std::set LowRegs, HighRegs; + splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs); + + // Push low regs first + if (!LowRegs.empty()) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); - for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { - if (LoRegsToSave[Reg]) { + for (unsigned Reg : OrderedLowRegs) { + if (LowRegs.count(Reg)) { bool isKill = !MRI.isLiveIn(Reg); if (isKill && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); @@ -863,31 +864,26 @@ MIB.setMIFlags(MachineInstr::FrameSetup); } - // Push the high registers. There are no store instructions that can access - // these registers directly, so we have to move them to low registers, and - // push them. This might take multiple pushes, as it is possible for there to + // Now push the high registers + // There are no store instructions that can access high registers directly, + // so we have to move them to low registers, and push them. + // This might take multiple pushes, as it is possible for there to // be fewer low registers available than high registers which need saving. - // These are in reverse order so that in the case where we need to use + // Find the first register to save. + // Registers must be processed in reverse order so that in case we need to use // multiple PUSH instructions, the order of the registers on the stack still // matches the unwind info. They need to be swicthed back to ascending order // before adding to the PUSH instruction. - static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6, - ARM::R5, ARM::R4, ARM::R3, - ARM::R2, ARM::R1, ARM::R0}; - static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8}; - - const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); - const unsigned *AllHighRegsEnd = std::end(AllHighRegs); - - // Find the first register to save. - const unsigned *HiRegToSave = findNextOrderedReg( - std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd); + auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(), + OrderedHighRegs.rend(), + HighRegs); - while (HiRegToSave != AllHighRegsEnd) { + while (HiRegToSave != OrderedHighRegs.rend()) { // Find the first low register to use. - const unsigned *CopyReg = - findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); + auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(), + OrderedCopyRegs.rend(), + CopyRegs); // Create the PUSH, but don't insert it yet (the MOVs need to come first). MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)) @@ -895,25 +891,29 @@ .setMIFlags(MachineInstr::FrameSetup); SmallVector RegsToPush; - while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { - if (HiRegsToSave[*HiRegToSave]) { + while (HiRegToSave != OrderedHighRegs.rend() && + CopyRegIt != OrderedCopyRegs.rend()) { + if (HighRegs.count(*HiRegToSave)) { bool isKill = !MRI.isLiveIn(*HiRegToSave); if (isKill && !MRI.isReserved(*HiRegToSave)) MBB.addLiveIn(*HiRegToSave); // Emit a MOV from the high reg to the low reg. BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) - .addReg(*CopyReg, RegState::Define) + .addReg(*CopyRegIt, RegState::Define) .addReg(*HiRegToSave, getKillRegState(isKill)) .add(predOps(ARMCC::AL)) .setMIFlags(MachineInstr::FrameSetup); // Record the register that must be added to the PUSH. - RegsToPush.push_back(*CopyReg); - - CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); - HiRegToSave = - findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd); + RegsToPush.push_back(*CopyRegIt); + + CopyRegIt = getNextOrderedReg(std::next(CopyRegIt), + OrderedCopyRegs.rend(), + CopyRegs); + HiRegToSave = getNextOrderedReg(std::next(HiRegToSave), + OrderedHighRegs.rend(), + HighRegs); } } @@ -924,84 +924,45 @@ // Insert the PUSH instruction after the MOVs. MBB.insert(MI, PushMIB); } - - return true; } -bool Thumb1FrameLowering::restoreCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - +static void popRegsFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const TargetInstrInfo &TII, + const std::set &RegsToRestore, + const std::set &CopyRegs, + bool IsVarArg, bool HasV5Ops) { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo(); - const TargetInstrInfo &TII = *STI.getInstrInfo(); - const ARMBaseRegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); - - bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); - ARMRegSet LoRegsToRestore; - ARMRegSet HiRegsToRestore; - // Low registers (r0-r7) which can be used to restore the high registers. - ARMRegSet CopyRegs; - - for (CalleeSavedInfo I : CSI) { - Register Reg = I.getReg(); - - if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LoRegsToRestore[Reg] = true; - } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HiRegsToRestore[Reg] = true; - } else { - llvm_unreachable("callee-saved register of unexpected class"); - } - - // If this is a low register not used as the frame pointer, we may want to - // use it for restoring the high registers. - if ((ARM::tGPRRegClass.contains(Reg)) && - !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) - CopyRegs[Reg] = true; - } + std::set LowRegs, HighRegs; + splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs); - // If this is a return block, we may be able to use some unused return value - // registers for restoring the high regs. - auto Terminator = MBB.getFirstTerminator(); - if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { - CopyRegs[ARM::R0] = true; - CopyRegs[ARM::R1] = true; - CopyRegs[ARM::R2] = true; - CopyRegs[ARM::R3] = true; - for (auto Op : Terminator->implicit_operands()) { - if (Op.isReg()) - CopyRegs[Op.getReg()] = false; - } - } - - static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7}; - static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11}; - - const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); - const unsigned *AllHighRegsEnd = std::end(AllHighRegs); + // Pop the high registers first + // There are no store instructions that can access high registers directly, + // so we have to pop into low registers and them move to the high registers. + // This might take multiple pops, as it is possible for there to + // be fewer low registers available than high registers which need restoring. // Find the first register to restore. - auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs), - HiRegsToRestore, AllHighRegsEnd); + auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(), + OrderedHighRegs.end(), + HighRegs); - while (HiRegToRestore != AllHighRegsEnd) { - assert(!CopyRegs.none()); + while (HiRegToRestore != OrderedHighRegs.end()) { + assert(!CopyRegs.empty()); // Find the first low register to use. - auto CopyReg = - findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); + auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(), + OrderedCopyRegs.end(), + CopyRegs); // Create the POP instruction. MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); - while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { + while (HiRegToRestore != OrderedHighRegs.end() && + CopyReg != OrderedCopyRegs.end()) { // Add the low register to the POP. PopMIB.addReg(*CopyReg, RegState::Define); @@ -1011,62 +972,165 @@ .addReg(*CopyReg, RegState::Kill) .add(predOps(ARMCC::AL)); - CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); - HiRegToRestore = - findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd); + CopyReg = getNextOrderedReg(std::next(CopyReg), + OrderedCopyRegs.end(), + CopyRegs); + HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore), + OrderedHighRegs.end(), + HighRegs); } } - MachineInstrBuilder MIB = - BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); - - bool NeedsPop = false; - for (CalleeSavedInfo &Info : llvm::reverse(CSI)) { - Register Reg = Info.getReg(); - - // High registers (excluding lr) have already been dealt with - if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR)) - continue; - - if (Reg == ARM::LR) { - Info.setRestored(false); - if (!MBB.succ_empty() || - MI->getOpcode() == ARM::TCRETURNdi || - MI->getOpcode() == ARM::TCRETURNri) - // LR may only be popped into PC, as part of return sequence. - // If this isn't the return sequence, we'll need emitPopSpecialFixUp - // to restore LR the hard way. - // FIXME: if we don't pass any stack arguments it would be actually - // advantageous *and* correct to do the conversion to an ordinary call - // instruction here. - continue; - // Special epilogue for vararg functions. See emitEpilogue - if (isVarArg) - continue; - // ARMv4T requires BX, see emitEpilogue - if (!STI.hasV5TOps()) - continue; + // Now pop the low registers + if (!LowRegs.empty()) { + MachineInstrBuilder MIB = + BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); - // CMSE entry functions must return via BXNS, see emitEpilogue. - if (AFI->isCmseNSEntryFunction()) + bool NeedsPop = false; + for (Register Reg : OrderedLowRegs) { + if (!LowRegs.count(Reg)) continue; - // Pop LR into PC. - Reg = ARM::PC; - (*MIB).setDesc(TII.get(ARM::tPOP_RET)); - if (MI != MBB.end()) - MIB.copyImplicitOps(*MI); - MI = MBB.erase(MI); + if (Reg == ARM::LR) { + if (!MBB.succ_empty() || + MI->getOpcode() == ARM::TCRETURNdi || + MI->getOpcode() == ARM::TCRETURNri) + // LR may only be popped into PC, as part of return sequence. + // If this isn't the return sequence, we'll need emitPopSpecialFixUp + // to restore LR the hard way. + // FIXME: if we don't pass any stack arguments it would be actually + // advantageous *and* correct to do the conversion to an ordinary call + // instruction here. + continue; + // Special epilogue for vararg functions. See emitEpilogue + if (IsVarArg) + continue; + // ARMv4T requires BX, see emitEpilogue + if (!HasV5Ops) + continue; + + // CMSE entry functions must return via BXNS, see emitEpilogue. + if (AFI->isCmseNSEntryFunction()) + continue; + + // Pop LR into PC. + Reg = ARM::PC; + (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + if (MI != MBB.end()) + MIB.copyImplicitOps(*MI); + MI = MBB.erase(MI); + } + MIB.addReg(Reg, getDefRegState(true)); + NeedsPop = true; + } + + // It's illegal to emit pop instruction without operands. + if (NeedsPop) + MBB.insert(MI, &*MIB); + else + MF.deleteMachineInstr(MIB); + } +} + +bool Thumb1FrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + ArrayRef CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + const TargetInstrInfo &TII = *STI.getInstrInfo(); + MachineFunction &MF = *MBB.getParent(); + const ARMBaseRegisterInfo *RegInfo = static_cast( + MF.getSubtarget().getRegisterInfo()); + Register FPReg = RegInfo->getFrameRegister(MF); + + // In case FP is a high reg, we need a separate push sequence to generate + // a correct Frame Record + bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); + + std::set FrameRecord; + std::set SpilledGPRs; + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR)) + FrameRecord.insert(Reg); + else + SpilledGPRs.insert(Reg); + } + + pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR}); + + // Determine intermidiate registers which can be used for pushing high regs: + // - Spilled low regs + // - Unused argument registers + std::set CopyRegs; + for (Register Reg : SpilledGPRs) + if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && + !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg)) + CopyRegs.insert(Reg); + for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) + if (!MF.getRegInfo().isLiveIn(ArgReg)) + CopyRegs.insert(ArgReg); + + pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs); + + return true; +} + +bool Thumb1FrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + const ARMBaseRegisterInfo *RegInfo = static_cast( + MF.getSubtarget().getRegisterInfo()); + bool IsVarArg = AFI->getArgRegsSaveSize() > 0; + Register FPReg = RegInfo->getFrameRegister(MF); + + // In case FP is a high reg, we need a separate pop sequence to generate + // a correct Frame Record + bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); + + std::set FrameRecord; + std::set SpilledGPRs; + for (CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR)) + FrameRecord.insert(Reg); + else + SpilledGPRs.insert(Reg); + + if (Reg == ARM::LR) + I.setRestored(false); + } + + // Determine intermidiate registers which can be used for popping high regs: + // - Spilled low regs + // - Unused return registers + std::set CopyRegs; + for (Register Reg : SpilledGPRs) + if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg)) + CopyRegs.insert(Reg); + auto Terminator = MBB.getFirstTerminator(); + if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { + CopyRegs.insert(ARM::R0); + CopyRegs.insert(ARM::R1); + CopyRegs.insert(ARM::R2); + CopyRegs.insert(ARM::R3); + for (auto Op : Terminator->implicit_operands()) { + if (Op.isReg()) + CopyRegs.erase(Op.getReg()); } - MIB.addReg(Reg, getDefRegState(true)); - NeedsPop = true; } - // It's illegal to emit pop instruction without operands. - if (NeedsPop) - MBB.insert(MI, &*MIB); - else - MF.deleteMachineInstr(MIB); + popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg, + STI.hasV5TOps()); + popRegsFromStack(MBB, MI, TII, FrameRecord, {ARM::LR}, IsVarArg, + STI.hasV5TOps()); return true; } diff --git a/llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll b/llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll @@ -0,0 +1,25 @@ +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 + +declare void @leaf(i32 %input) + +define void @reserved_r7(i32 %input) { +; RESERVED-NONE-NOT: error: write to reserved register 'R7' +; RESERVED-R11-NOT: error: write to reserved register 'R7' + %1 = call i32 asm sideeffect "mov $0, $1", "={r7},r"(i32 %input) + ret void +} + +define void @reserved_r11(i32 %input) { +; RESERVED-NONE-NOT: error: write to reserved register 'R11' +; RESERVED-R11: error: write to reserved register 'R11' + %1 = call i32 asm sideeffect "mov $0, $1", "={r11},r"(i32 %input) + ret void +} diff --git a/llvm/test/CodeGen/ARM/frame-chain.ll b/llvm/test/CodeGen/ARM/frame-chain.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/frame-chain.ll @@ -0,0 +1,228 @@ +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all | FileCheck %s --check-prefixes=FP,LEAF-FP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP-AAPCS +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf | FileCheck %s --check-prefixes=FP,LEAF-NOFP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP-AAPCS +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none | FileCheck %s --check-prefixes=NOFP,LEAF-NOFP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP-AAPCS + +define dso_local noundef i32 @leaf(i32 noundef %0) { +; LEAF-FP-LABEL: leaf: +; LEAF-FP: @ %bb.0: +; LEAF-FP-NEXT: .pad #4 +; LEAF-FP-NEXT: sub sp, sp, #4 +; LEAF-FP-NEXT: str r0, [sp] +; LEAF-FP-NEXT: add r0, r0, #4 +; LEAF-FP-NEXT: add sp, sp, #4 +; LEAF-FP-NEXT: mov pc, lr +; +; LEAF-FP-AAPCS-LABEL: leaf: +; LEAF-FP-AAPCS: @ %bb.0: +; LEAF-FP-AAPCS-NEXT: .save {r11, lr} +; LEAF-FP-AAPCS-NEXT: push {r11, lr} +; LEAF-FP-AAPCS-NEXT: .setfp r11, sp +; LEAF-FP-AAPCS-NEXT: mov r11, sp +; LEAF-FP-AAPCS-NEXT: push {r0} +; LEAF-FP-AAPCS-NEXT: add r0, r0, #4 +; LEAF-FP-AAPCS-NEXT: mov sp, r11 +; LEAF-FP-AAPCS-NEXT: pop {r11, lr} +; LEAF-FP-AAPCS-NEXT: mov pc, lr +; +; LEAF-NOFP-LABEL: leaf: +; LEAF-NOFP: @ %bb.0: +; LEAF-NOFP-NEXT: .pad #4 +; LEAF-NOFP-NEXT: sub sp, sp, #4 +; LEAF-NOFP-NEXT: str r0, [sp] +; LEAF-NOFP-NEXT: add r0, r0, #4 +; LEAF-NOFP-NEXT: add sp, sp, #4 +; LEAF-NOFP-NEXT: mov pc, lr +; +; LEAF-NOFP-AAPCS-LABEL: leaf: +; LEAF-NOFP-AAPCS: @ %bb.0: +; LEAF-NOFP-AAPCS-NEXT: .save {r11, lr} +; LEAF-NOFP-AAPCS-NEXT: push {r11, lr} +; LEAF-NOFP-AAPCS-NEXT: .setfp r11, sp +; LEAF-NOFP-AAPCS-NEXT: mov r11, sp +; LEAF-NOFP-AAPCS-NEXT: push {r0} +; LEAF-NOFP-AAPCS-NEXT: add r0, r0, #4 +; LEAF-NOFP-AAPCS-NEXT: mov sp, r11 +; LEAF-NOFP-AAPCS-NEXT: pop {r11, lr} +; LEAF-NOFP-AAPCS-NEXT: mov pc, lr + %2 = alloca i32, align 4 + store i32 %0, i32* %2, align 4 + %3 = load i32, i32* %2, align 4 + %4 = add nsw i32 %3, 4 + ret i32 %4 +} + +define dso_local noundef i32 @non_leaf(i32 noundef %0) { +; FP-LABEL: non_leaf: +; FP: @ %bb.0: +; FP-NEXT: .save {r11, lr} +; FP-NEXT: push {r11, lr} +; FP-NEXT: .setfp r11, sp +; FP-NEXT: mov r11, sp +; FP-NEXT: .pad #8 +; FP-NEXT: sub sp, sp, #8 +; FP-NEXT: str r0, [sp, #4] +; FP-NEXT: bl leaf +; FP-NEXT: add r0, r0, #1 +; FP-NEXT: mov sp, r11 +; FP-NEXT: pop {r11, lr} +; FP-NEXT: mov pc, lr +; +; FP-AAPCS-LABEL: non_leaf: +; FP-AAPCS: @ %bb.0: +; FP-AAPCS-NEXT: .save {r11, lr} +; FP-AAPCS-NEXT: push {r11, lr} +; FP-AAPCS-NEXT: .setfp r11, sp +; FP-AAPCS-NEXT: mov r11, sp +; FP-AAPCS-NEXT: .pad #8 +; FP-AAPCS-NEXT: sub sp, sp, #8 +; FP-AAPCS-NEXT: str r0, [sp, #4] +; FP-AAPCS-NEXT: bl leaf +; FP-AAPCS-NEXT: add r0, r0, #1 +; FP-AAPCS-NEXT: mov sp, r11 +; FP-AAPCS-NEXT: pop {r11, lr} +; FP-AAPCS-NEXT: mov pc, lr +; +; NOFP-LABEL: non_leaf: +; NOFP: @ %bb.0: +; NOFP-NEXT: .save {r11, lr} +; NOFP-NEXT: push {r11, lr} +; NOFP-NEXT: .pad #8 +; NOFP-NEXT: sub sp, sp, #8 +; NOFP-NEXT: str r0, [sp, #4] +; NOFP-NEXT: bl leaf +; NOFP-NEXT: add r0, r0, #1 +; NOFP-NEXT: add sp, sp, #8 +; NOFP-NEXT: pop {r11, lr} +; NOFP-NEXT: mov pc, lr +; +; NOFP-AAPCS-LABEL: non_leaf: +; NOFP-AAPCS: @ %bb.0: +; NOFP-AAPCS-NEXT: .save {r11, lr} +; NOFP-AAPCS-NEXT: push {r11, lr} +; NOFP-AAPCS-NEXT: .setfp r11, sp +; NOFP-AAPCS-NEXT: mov r11, sp +; NOFP-AAPCS-NEXT: .pad #8 +; NOFP-AAPCS-NEXT: sub sp, sp, #8 +; NOFP-AAPCS-NEXT: str r0, [sp, #4] +; NOFP-AAPCS-NEXT: bl leaf +; NOFP-AAPCS-NEXT: add r0, r0, #1 +; NOFP-AAPCS-NEXT: mov sp, r11 +; NOFP-AAPCS-NEXT: pop {r11, lr} +; NOFP-AAPCS-NEXT: mov pc, lr + %2 = alloca i32, align 4 + store i32 %0, i32* %2, align 4 + %3 = load i32, i32* %2, align 4 + %4 = call noundef i32 @leaf(i32 noundef %3) + %5 = add nsw i32 %4, 1 + ret i32 %5 +} + +declare i8* @llvm.stacksave() +define dso_local void @required_fp(i32 %0, i32 %1) { +; LEAF-FP-LABEL: required_fp: +; LEAF-FP: @ %bb.0: +; LEAF-FP-NEXT: .save {r4, r5, r11, lr} +; LEAF-FP-NEXT: push {r4, r5, r11, lr} +; LEAF-FP-NEXT: .setfp r11, sp, #8 +; LEAF-FP-NEXT: add r11, sp, #8 +; LEAF-FP-NEXT: .pad #24 +; LEAF-FP-NEXT: sub sp, sp, #24 +; LEAF-FP-NEXT: str r1, [r11, #-16] +; LEAF-FP-NEXT: mov r1, #7 +; LEAF-FP-NEXT: add r1, r1, r0, lsl #2 +; LEAF-FP-NEXT: str r0, [r11, #-12] +; LEAF-FP-NEXT: bic r1, r1, #7 +; LEAF-FP-NEXT: str sp, [r11, #-24] +; LEAF-FP-NEXT: sub sp, sp, r1 +; LEAF-FP-NEXT: mov r1, #0 +; LEAF-FP-NEXT: str r0, [r11, #-32] +; LEAF-FP-NEXT: str r1, [r11, #-28] +; LEAF-FP-NEXT: sub sp, r11, #8 +; LEAF-FP-NEXT: pop {r4, r5, r11, lr} +; LEAF-FP-NEXT: mov pc, lr +; +; LEAF-FP-AAPCS-LABEL: required_fp: +; LEAF-FP-AAPCS: @ %bb.0: +; LEAF-FP-AAPCS-NEXT: .save {r4, r5, r11, lr} +; LEAF-FP-AAPCS-NEXT: push {r4, r5, r11, lr} +; LEAF-FP-AAPCS-NEXT: .setfp r11, sp, #8 +; LEAF-FP-AAPCS-NEXT: add r11, sp, #8 +; LEAF-FP-AAPCS-NEXT: .pad #24 +; LEAF-FP-AAPCS-NEXT: sub sp, sp, #24 +; LEAF-FP-AAPCS-NEXT: str r1, [r11, #-16] +; LEAF-FP-AAPCS-NEXT: mov r1, #7 +; LEAF-FP-AAPCS-NEXT: add r1, r1, r0, lsl #2 +; LEAF-FP-AAPCS-NEXT: str r0, [r11, #-12] +; LEAF-FP-AAPCS-NEXT: bic r1, r1, #7 +; LEAF-FP-AAPCS-NEXT: str sp, [r11, #-24] +; LEAF-FP-AAPCS-NEXT: sub sp, sp, r1 +; LEAF-FP-AAPCS-NEXT: mov r1, #0 +; LEAF-FP-AAPCS-NEXT: str r0, [r11, #-32] +; LEAF-FP-AAPCS-NEXT: str r1, [r11, #-28] +; LEAF-FP-AAPCS-NEXT: sub sp, r11, #8 +; LEAF-FP-AAPCS-NEXT: pop {r4, r5, r11, lr} +; LEAF-FP-AAPCS-NEXT: mov pc, lr +; +; LEAF-NOFP-LABEL: required_fp: +; LEAF-NOFP: @ %bb.0: +; LEAF-NOFP-NEXT: .save {r4, r5, r11} +; LEAF-NOFP-NEXT: push {r4, r5, r11} +; LEAF-NOFP-NEXT: .setfp r11, sp, #8 +; LEAF-NOFP-NEXT: add r11, sp, #8 +; LEAF-NOFP-NEXT: .pad #20 +; LEAF-NOFP-NEXT: sub sp, sp, #20 +; LEAF-NOFP-NEXT: str r1, [r11, #-16] +; LEAF-NOFP-NEXT: mov r1, #7 +; LEAF-NOFP-NEXT: add r1, r1, r0, lsl #2 +; LEAF-NOFP-NEXT: str r0, [r11, #-12] +; LEAF-NOFP-NEXT: bic r1, r1, #7 +; LEAF-NOFP-NEXT: str sp, [r11, #-20] +; LEAF-NOFP-NEXT: sub sp, sp, r1 +; LEAF-NOFP-NEXT: mov r1, #0 +; LEAF-NOFP-NEXT: str r0, [r11, #-28] +; LEAF-NOFP-NEXT: str r1, [r11, #-24] +; LEAF-NOFP-NEXT: sub sp, r11, #8 +; LEAF-NOFP-NEXT: pop {r4, r5, r11} +; LEAF-NOFP-NEXT: mov pc, lr +; +; LEAF-NOFP-AAPCS-LABEL: required_fp: +; LEAF-NOFP-AAPCS: @ %bb.0: +; LEAF-NOFP-AAPCS-NEXT: .save {r4, r5, r11, lr} +; LEAF-NOFP-AAPCS-NEXT: push {r4, r5, r11, lr} +; LEAF-NOFP-AAPCS-NEXT: .setfp r11, sp, #8 +; LEAF-NOFP-AAPCS-NEXT: add r11, sp, #8 +; LEAF-NOFP-AAPCS-NEXT: .pad #24 +; LEAF-NOFP-AAPCS-NEXT: sub sp, sp, #24 +; LEAF-NOFP-AAPCS-NEXT: str r1, [r11, #-16] +; LEAF-NOFP-AAPCS-NEXT: mov r1, #7 +; LEAF-NOFP-AAPCS-NEXT: add r1, r1, r0, lsl #2 +; LEAF-NOFP-AAPCS-NEXT: str r0, [r11, #-12] +; LEAF-NOFP-AAPCS-NEXT: bic r1, r1, #7 +; LEAF-NOFP-AAPCS-NEXT: str sp, [r11, #-24] +; LEAF-NOFP-AAPCS-NEXT: sub sp, sp, r1 +; LEAF-NOFP-AAPCS-NEXT: mov r1, #0 +; LEAF-NOFP-AAPCS-NEXT: str r0, [r11, #-32] +; LEAF-NOFP-AAPCS-NEXT: str r1, [r11, #-28] +; LEAF-NOFP-AAPCS-NEXT: sub sp, r11, #8 +; LEAF-NOFP-AAPCS-NEXT: pop {r4, r5, r11, lr} +; LEAF-NOFP-AAPCS-NEXT: mov pc, lr + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i8*, align 8 + %6 = alloca i64, align 8 + store i32 %0, i32* %3, align 4 + store i32 %1, i32* %4, align 4 + %7 = load i32, i32* %3, align 4 + %8 = zext i32 %7 to i64 + %9 = call i8* @llvm.stacksave() + store i8* %9, i8** %5, align 8 + %10 = alloca i32, i64 %8, align 4 + store i64 %8, i64* %6, align 8 + ret void +} diff --git a/llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll b/llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll @@ -0,0 +1,27 @@ +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all 2>&1 | FileCheck %s --check-prefix=RESERVED-R7 +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 + +declare void @leaf(i32 %input) + +define void @reserved_r7(i32 %input) { +; RESERVED-NONE-NOT: error: write to reserved register 'R7' +; RESERVED-R7: error: write to reserved register 'R7' +; RESERVED-R11-NOT: error: write to reserved register 'R7' + %1 = call i32 asm sideeffect "mov $0, $1", "={r7},r"(i32 %input) + ret void +} + +define void @reserved_r11(i32 %input) { +; RESERVED-NONE-NOT: error: write to reserved register 'R11' +; RESERVED-R7-NOT: error: write to reserved register 'R11' +; RESERVED-R11: error: write to reserved register 'R11' + %1 = call i32 asm sideeffect "mov $0, $1", "={r11},r"(i32 %input) + ret void +} diff --git a/llvm/test/CodeGen/Thumb/frame-chain.ll b/llvm/test/CodeGen/Thumb/frame-chain.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb/frame-chain.ll @@ -0,0 +1,301 @@ +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=all | FileCheck %s --check-prefixes=FP,LEAF-FP +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP-AAPCS +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf | FileCheck %s --check-prefixes=FP,LEAF-NOFP +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP-AAPCS +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=none | FileCheck %s --check-prefixes=NOFP,LEAF-NOFP +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP-AAPCS + +define dso_local noundef i32 @leaf(i32 noundef %0) { +; LEAF-FP-LABEL: leaf: +; LEAF-FP: @ %bb.0: +; LEAF-FP-NEXT: .pad #4 +; LEAF-FP-NEXT: sub sp, #4 +; LEAF-FP-NEXT: str r0, [sp] +; LEAF-FP-NEXT: adds r0, r0, #4 +; LEAF-FP-NEXT: add sp, #4 +; LEAF-FP-NEXT: bx lr +; +; LEAF-FP-AAPCS-LABEL: leaf: +; LEAF-FP-AAPCS: @ %bb.0: +; LEAF-FP-AAPCS-NEXT: .save {lr} +; LEAF-FP-AAPCS-NEXT: push {lr} +; LEAF-FP-AAPCS-NEXT: mov lr, r11 +; LEAF-FP-AAPCS-NEXT: .save {r11} +; LEAF-FP-AAPCS-NEXT: push {lr} +; LEAF-FP-AAPCS-NEXT: .setfp r11, sp +; LEAF-FP-AAPCS-NEXT: add r11, sp, #0 +; LEAF-FP-AAPCS-NEXT: .pad #4 +; LEAF-FP-AAPCS-NEXT: sub sp, #4 +; LEAF-FP-AAPCS-NEXT: str r0, [sp] +; LEAF-FP-AAPCS-NEXT: adds r0, r0, #4 +; LEAF-FP-AAPCS-NEXT: add sp, #4 +; LEAF-FP-AAPCS-NEXT: pop {lr} +; LEAF-FP-AAPCS-NEXT: mov r11, lr +; LEAF-FP-AAPCS-NEXT: pop {r1} +; LEAF-FP-AAPCS-NEXT: bx r1 +; +; LEAF-NOFP-LABEL: leaf: +; LEAF-NOFP: @ %bb.0: +; LEAF-NOFP-NEXT: .pad #4 +; LEAF-NOFP-NEXT: sub sp, #4 +; LEAF-NOFP-NEXT: str r0, [sp] +; LEAF-NOFP-NEXT: adds r0, r0, #4 +; LEAF-NOFP-NEXT: add sp, #4 +; LEAF-NOFP-NEXT: bx lr +; +; LEAF-NOFP-AAPCS-LABEL: leaf: +; LEAF-NOFP-AAPCS: @ %bb.0: +; LEAF-NOFP-AAPCS-NEXT: .save {lr} +; LEAF-NOFP-AAPCS-NEXT: push {lr} +; LEAF-NOFP-AAPCS-NEXT: mov lr, r11 +; LEAF-NOFP-AAPCS-NEXT: .save {r11} +; LEAF-NOFP-AAPCS-NEXT: push {lr} +; LEAF-NOFP-AAPCS-NEXT: .setfp r11, sp +; LEAF-NOFP-AAPCS-NEXT: add r11, sp, #0 +; LEAF-NOFP-AAPCS-NEXT: .pad #4 +; LEAF-NOFP-AAPCS-NEXT: sub sp, #4 +; LEAF-NOFP-AAPCS-NEXT: str r0, [sp] +; LEAF-NOFP-AAPCS-NEXT: adds r0, r0, #4 +; LEAF-NOFP-AAPCS-NEXT: add sp, #4 +; LEAF-NOFP-AAPCS-NEXT: pop {lr} +; LEAF-NOFP-AAPCS-NEXT: mov r11, lr +; LEAF-NOFP-AAPCS-NEXT: pop {r1} +; LEAF-NOFP-AAPCS-NEXT: bx r1 + %2 = alloca i32, align 4 + store i32 %0, i32* %2, align 4 + %3 = load i32, i32* %2, align 4 + %4 = add nsw i32 %3, 4 + ret i32 %4 +} + +define dso_local noundef i32 @non_leaf(i32 noundef %0) { +; FP-LABEL: non_leaf: +; FP: @ %bb.0: +; FP-NEXT: .save {r7, lr} +; FP-NEXT: push {r7, lr} +; FP-NEXT: .setfp r7, sp +; FP-NEXT: add r7, sp, #0 +; FP-NEXT: .pad #8 +; FP-NEXT: sub sp, #8 +; FP-NEXT: str r0, [sp, #4] +; FP-NEXT: bl leaf +; FP-NEXT: adds r0, r0, #1 +; FP-NEXT: add sp, #8 +; FP-NEXT: pop {r7} +; FP-NEXT: pop {r1} +; FP-NEXT: bx r1 +; +; FP-AAPCS-LABEL: non_leaf: +; FP-AAPCS: @ %bb.0: +; FP-AAPCS-NEXT: .save {lr} +; FP-AAPCS-NEXT: push {lr} +; FP-AAPCS-NEXT: mov lr, r11 +; FP-AAPCS-NEXT: .save {r11} +; FP-AAPCS-NEXT: push {lr} +; FP-AAPCS-NEXT: .setfp r11, sp +; FP-AAPCS-NEXT: add r11, sp, #0 +; FP-AAPCS-NEXT: .pad #8 +; FP-AAPCS-NEXT: sub sp, #8 +; FP-AAPCS-NEXT: str r0, [sp, #4] +; FP-AAPCS-NEXT: bl leaf +; FP-AAPCS-NEXT: adds r0, r0, #1 +; FP-AAPCS-NEXT: add sp, #8 +; FP-AAPCS-NEXT: pop {lr} +; FP-AAPCS-NEXT: mov r11, lr +; FP-AAPCS-NEXT: pop {r1} +; FP-AAPCS-NEXT: bx r1 +; +; NOFP-LABEL: non_leaf: +; NOFP: @ %bb.0: +; NOFP-NEXT: .save {r7, lr} +; NOFP-NEXT: push {r7, lr} +; NOFP-NEXT: .pad #8 +; NOFP-NEXT: sub sp, #8 +; NOFP-NEXT: str r0, [sp, #4] +; NOFP-NEXT: bl leaf +; NOFP-NEXT: adds r0, r0, #1 +; NOFP-NEXT: add sp, #8 +; NOFP-NEXT: pop {r7} +; NOFP-NEXT: pop {r1} +; NOFP-NEXT: bx r1 +; +; NOFP-AAPCS-LABEL: non_leaf: +; NOFP-AAPCS: @ %bb.0: +; NOFP-AAPCS-NEXT: .save {lr} +; NOFP-AAPCS-NEXT: push {lr} +; NOFP-AAPCS-NEXT: mov lr, r11 +; NOFP-AAPCS-NEXT: .save {r11} +; NOFP-AAPCS-NEXT: push {lr} +; NOFP-AAPCS-NEXT: .setfp r11, sp +; NOFP-AAPCS-NEXT: add r11, sp, #0 +; NOFP-AAPCS-NEXT: .pad #8 +; NOFP-AAPCS-NEXT: sub sp, #8 +; NOFP-AAPCS-NEXT: str r0, [sp, #4] +; NOFP-AAPCS-NEXT: bl leaf +; NOFP-AAPCS-NEXT: adds r0, r0, #1 +; NOFP-AAPCS-NEXT: add sp, #8 +; NOFP-AAPCS-NEXT: pop {lr} +; NOFP-AAPCS-NEXT: mov r11, lr +; NOFP-AAPCS-NEXT: pop {r1} +; NOFP-AAPCS-NEXT: bx r1 + %2 = alloca i32, align 4 + store i32 %0, i32* %2, align 4 + %3 = load i32, i32* %2, align 4 + %4 = call noundef i32 @leaf(i32 noundef %3) + %5 = add nsw i32 %4, 1 + ret i32 %5 +} + +declare i8* @llvm.stacksave() +define dso_local void @required_fp(i32 %0, i32 %1) { +; FP-LABEL: required_fp: +; FP: @ %bb.0: +; FP-NEXT: .save {r4, r6, r7, lr} +; FP-NEXT: push {r4, r6, r7, lr} +; FP-NEXT: .setfp r7, sp, #8 +; FP-NEXT: add r7, sp, #8 +; FP-NEXT: .pad #24 +; FP-NEXT: sub sp, #24 +; FP-NEXT: mov r6, sp +; FP-NEXT: mov r2, r6 +; FP-NEXT: str r1, [r2, #16] +; FP-NEXT: str r0, [r2, #20] +; FP-NEXT: mov r1, sp +; FP-NEXT: str r1, [r2, #8] +; FP-NEXT: lsls r1, r0, #2 +; FP-NEXT: adds r1, r1, #7 +; FP-NEXT: movs r3, #7 +; FP-NEXT: bics r1, r3 +; FP-NEXT: mov r3, sp +; FP-NEXT: subs r1, r3, r1 +; FP-NEXT: mov sp, r1 +; FP-NEXT: movs r1, #0 +; FP-NEXT: str r1, [r6, #4] +; FP-NEXT: str r0, [r2] +; FP-NEXT: subs r4, r7, #7 +; FP-NEXT: subs r4, #1 +; FP-NEXT: mov sp, r4 +; FP-NEXT: pop {r4, r6, r7} +; FP-NEXT: pop {r0} +; FP-NEXT: bx r0 +; +; FP-AAPCS-LABEL: required_fp: +; FP-AAPCS: @ %bb.0: +; FP-AAPCS-NEXT: .save {lr} +; FP-AAPCS-NEXT: push {lr} +; FP-AAPCS-NEXT: mov lr, r11 +; FP-AAPCS-NEXT: .save {r11} +; FP-AAPCS-NEXT: push {lr} +; FP-AAPCS-NEXT: .save {r4, r6} +; FP-AAPCS-NEXT: push {r4, r6} +; FP-AAPCS-NEXT: .setfp r11, sp +; FP-AAPCS-NEXT: add r11, sp, #0 +; FP-AAPCS-NEXT: .pad #24 +; FP-AAPCS-NEXT: sub sp, #24 +; FP-AAPCS-NEXT: mov r6, sp +; FP-AAPCS-NEXT: mov r2, r6 +; FP-AAPCS-NEXT: str r1, [r2, #16] +; FP-AAPCS-NEXT: str r0, [r2, #20] +; FP-AAPCS-NEXT: mov r1, sp +; FP-AAPCS-NEXT: str r1, [r2, #8] +; FP-AAPCS-NEXT: lsls r1, r0, #2 +; FP-AAPCS-NEXT: adds r1, r1, #7 +; FP-AAPCS-NEXT: movs r3, #7 +; FP-AAPCS-NEXT: bics r1, r3 +; FP-AAPCS-NEXT: mov r3, sp +; FP-AAPCS-NEXT: subs r1, r3, r1 +; FP-AAPCS-NEXT: mov sp, r1 +; FP-AAPCS-NEXT: movs r1, #0 +; FP-AAPCS-NEXT: str r1, [r6, #4] +; FP-AAPCS-NEXT: str r0, [r2] +; FP-AAPCS-NEXT: mov sp, r11 +; FP-AAPCS-NEXT: pop {r4, r6} +; FP-AAPCS-NEXT: pop {lr} +; FP-AAPCS-NEXT: mov r11, lr +; FP-AAPCS-NEXT: pop {r0} +; FP-AAPCS-NEXT: bx r0 +; +; NOFP-LABEL: required_fp: +; NOFP: @ %bb.0: +; NOFP-NEXT: .save {r4, r6, r7, lr} +; NOFP-NEXT: push {r4, r6, r7, lr} +; NOFP-NEXT: .setfp r7, sp, #8 +; NOFP-NEXT: add r7, sp, #8 +; NOFP-NEXT: .pad #24 +; NOFP-NEXT: sub sp, #24 +; NOFP-NEXT: mov r6, sp +; NOFP-NEXT: mov r2, r6 +; NOFP-NEXT: str r1, [r2, #16] +; NOFP-NEXT: str r0, [r2, #20] +; NOFP-NEXT: mov r1, sp +; NOFP-NEXT: str r1, [r2, #8] +; NOFP-NEXT: lsls r1, r0, #2 +; NOFP-NEXT: adds r1, r1, #7 +; NOFP-NEXT: movs r3, #7 +; NOFP-NEXT: bics r1, r3 +; NOFP-NEXT: mov r3, sp +; NOFP-NEXT: subs r1, r3, r1 +; NOFP-NEXT: mov sp, r1 +; NOFP-NEXT: movs r1, #0 +; NOFP-NEXT: str r1, [r6, #4] +; NOFP-NEXT: str r0, [r2] +; NOFP-NEXT: subs r4, r7, #7 +; NOFP-NEXT: subs r4, #1 +; NOFP-NEXT: mov sp, r4 +; NOFP-NEXT: pop {r4, r6, r7} +; NOFP-NEXT: pop {r0} +; NOFP-NEXT: bx r0 +; +; NOFP-AAPCS-LABEL: required_fp: +; NOFP-AAPCS: @ %bb.0: +; NOFP-AAPCS-NEXT: .save {lr} +; NOFP-AAPCS-NEXT: push {lr} +; NOFP-AAPCS-NEXT: mov lr, r11 +; NOFP-AAPCS-NEXT: .save {r11} +; NOFP-AAPCS-NEXT: push {lr} +; NOFP-AAPCS-NEXT: .save {r4, r6} +; NOFP-AAPCS-NEXT: push {r4, r6} +; NOFP-AAPCS-NEXT: .setfp r11, sp +; NOFP-AAPCS-NEXT: add r11, sp, #0 +; NOFP-AAPCS-NEXT: .pad #24 +; NOFP-AAPCS-NEXT: sub sp, #24 +; NOFP-AAPCS-NEXT: mov r6, sp +; NOFP-AAPCS-NEXT: mov r2, r6 +; NOFP-AAPCS-NEXT: str r1, [r2, #16] +; NOFP-AAPCS-NEXT: str r0, [r2, #20] +; NOFP-AAPCS-NEXT: mov r1, sp +; NOFP-AAPCS-NEXT: str r1, [r2, #8] +; NOFP-AAPCS-NEXT: lsls r1, r0, #2 +; NOFP-AAPCS-NEXT: adds r1, r1, #7 +; NOFP-AAPCS-NEXT: movs r3, #7 +; NOFP-AAPCS-NEXT: bics r1, r3 +; NOFP-AAPCS-NEXT: mov r3, sp +; NOFP-AAPCS-NEXT: subs r1, r3, r1 +; NOFP-AAPCS-NEXT: mov sp, r1 +; NOFP-AAPCS-NEXT: movs r1, #0 +; NOFP-AAPCS-NEXT: str r1, [r6, #4] +; NOFP-AAPCS-NEXT: str r0, [r2] +; NOFP-AAPCS-NEXT: mov sp, r11 +; NOFP-AAPCS-NEXT: pop {r4, r6} +; NOFP-AAPCS-NEXT: pop {lr} +; NOFP-AAPCS-NEXT: mov r11, lr +; NOFP-AAPCS-NEXT: pop {r0} +; NOFP-AAPCS-NEXT: bx r0 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i8*, align 8 + %6 = alloca i64, align 8 + store i32 %0, i32* %3, align 4 + store i32 %1, i32* %4, align 4 + %7 = load i32, i32* %3, align 4 + %8 = zext i32 %7 to i64 + %9 = call i8* @llvm.stacksave() + store i8* %9, i8** %5, align 8 + %10 = alloca i32, i64 %8, align 4 + store i64 %8, i64* %6, align 8 + ret void +} +