Index: lib/CodeGen/MachineOutliner.cpp =================================================================== --- lib/CodeGen/MachineOutliner.cpp +++ lib/CodeGen/MachineOutliner.cpp @@ -1159,6 +1159,9 @@ // Outlined functions shouldn't preserve liveness. MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); MF.getRegInfo().freezeReservedRegs(MF); + MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA); + MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs); + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); // If there's a DISubprogram associated with this outlined function, then // emit debug info for the outlined function. Index: lib/Target/ARM/ARMBaseInstrInfo.h =================================================================== --- lib/Target/ARM/ARMBaseInstrInfo.h +++ lib/Target/ARM/ARMBaseInstrInfo.h @@ -341,7 +341,38 @@ ArrayRef> getSerializableBitmaskMachineOperandTargetFlags() const override; + /// ARM supports the MachineOutliner. + bool isFunctionSafeToOutlineFrom(MachineFunction &MF, + bool OutlineFromLinkOnceODRs) const override; + outliner::OutlinedFunction getOutliningCandidateInfo( + std::vector &RepeatedSequenceLocs) const override; + outliner::InstrType + getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override; + bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const override; + void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, + const outliner::OutlinedFunction &OF) const override; + MachineBasicBlock::iterator + insertOutlinedCall(Module &M, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It, MachineFunction &MF, + const outliner::Candidate &C) const override; + private: + /// \brief Sets the offsets on outlined instructions in \p MBB which use SP + /// so that they will be valid post-outlining. + /// + /// \param MBB A \p MachineBasicBlock in an outlined function. + void fixupPostOutline(MachineBasicBlock &MBB) const; + + /// Returns an unused general-purpose register which can be used for + /// constructing an outlined call if one exists. Returns 0 otherwise. + unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; + + /// Returns true if the machine instruction offset can handle the stack fixup + /// and updates it if requested. + bool checkAndUpdateStackOffset(MachineInstr *MI, int64_t fixup, + bool Updt) const; + unsigned getInstBundleLength(const MachineInstr &MI) const; int getVLDMDefCycle(const InstrItineraryData *ItinData, Index: lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- lib/Target/ARM/ARMBaseInstrInfo.cpp +++ lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" @@ -5107,3 +5108,871 @@ {MO_NONLAZY, "arm-nonlazy"}}; return makeArrayRef(TargetFlags); } + +/// Constants defining how certain sequences should be outlined. +/// This encompasses how an outlined function should be called, and what kind of +/// frame should be emitted for that outlined function. +/// +/// \p MachineOutlinerDefault implies that the function should be called with +/// a save and restore of LR to the stack. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// BX LR +/// +/// * Call construction overhead: 3 (save + BL + restore) +/// * Frame construction overhead: 1 (BX) +/// * Requires stack fixups? Yes +/// +/// \p MachineOutlinerTailCall implies that the function is being created from +/// a sequence of instructions ending in a return. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> B OUTLINED_FUNCTION I1 +/// BX LR I2 +/// BX LR +/// +/// * Call construction overhead: 1 (B) +/// * Frame construction overhead: 0 (Return included in sequence) +/// * Requires stack fixups? No +/// +/// \p MachineOutlinerNoLRSave implies that the function should be called using +/// a BL instruction, but doesn't require LR to be saved and restored. This +/// happens when LR is known to be dead. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 I2 +/// I3 +/// BX LR +/// +/// * Call construction overhead: 1 (BL) +/// * Frame construction overhead: 1 (RET) +/// * Requires stack fixups? No +/// +/// \p MachineOutlinerThunk implies that the function is being created from +/// a sequence of instructions ending in a call. The outlined function is +/// called with a BL instruction, and the outlined function tail-calls the +/// original call destination. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// BL f I2 +/// B f +/// * Call construction overhead: 1 (BL) +/// * Frame construction overhead: 0 +/// * Requires stack fixups? No +/// +/// \p MachineOutlinerRegSave implies that the function should be called with a +/// save and restore of LR to an available register. This allows us to avoid +/// stack fixups. Note that this outlining variant is compatible with the +/// NoLRSave case. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// BX LR +/// +/// * Call construction overhead: 3 (save + BL + restore) +/// * Frame construction overhead: 1 (ret) +/// * Requires stack fixups? No + +enum MachineOutlinerClass { + MachineOutlinerDefault, /// Emit a save, restore, call, and return. + MachineOutlinerTailCall, /// Only emit a branch. + MachineOutlinerNoLRSave, /// Emit a call and return. + MachineOutlinerThunk, /// Emit a call and tail-call. + MachineOutlinerRegSave /// Same as default, but save to a register. +}; + +enum MachineOutlinerMBBFlags { + LRUnavailableSomewhere = 0x2, + HasCalls = 0x4, + UnsafeRegsDead = 0x8 +}; + +unsigned +ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { + assert(C.LRUWasSet && "LRU wasn't set?"); + MachineFunction *MF = C.getMF(); + const ARMBaseRegisterInfo *ARI = static_cast( + MF->getSubtarget().getRegisterInfo()); + + bool Thumb = Subtarget.isThumb(); + BitVector regsReserved = ARI->getReservedRegs(*MF); + // Check if there is an available register across the sequence that we can + // use. + for (unsigned Reg : (Thumb ? ARM::tGPRRegClass : ARM::GPRRegClass)) { + if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) && + Reg != ARM::LR && // LR is not reserved, but don't use it. + Reg != ARM::R12 && // R12 is not guaranteed to be preserved. + C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) + return Reg; + } + + // No suitable register. Return 0. + return 0u; +} + +outliner::OutlinedFunction +ARMBaseInstrInfo::getOutliningCandidateInfo( + std::vector &RepeatedSequenceLocs) const { + outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; + unsigned SequenceSize = + std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0, + [this](unsigned Sum, const MachineInstr &MI) { + return Sum + getInstSizeInBytes(MI); + }); + + // Properties about candidate MBBs that hold for all of them. + unsigned FlagsSetInAll = 0xF; + + // Compute liveness information for each candidate, and set FlagsSetInAll. + const TargetRegisterInfo &TRI = getRegisterInfo(); + std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + [&FlagsSetInAll](outliner::Candidate &C) { + FlagsSetInAll &= C.Flags; + }); + + // According to the ARM Procedure Call Standard, the following are + // undefined on entry/exit from a function call: + // + // * Register R12(IP), + // * Condition codes (and thus the CPSR register) + // + // Because if this, we can't outline any sequence of instructions where one + // of these registers is live into/across it. Thus, we need to delete those + // candidates. + auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { + // If the unsafe registers in this block are all dead, then we don't need + // to compute liveness here. + if (C.Flags & UnsafeRegsDead) + return false; + C.initLRU(TRI); + LiveRegUnits LRU = C.LRU; + return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR)); + }; + + // Are there any candidates where those registers are live? + if (!(FlagsSetInAll & UnsafeRegsDead)) { + // Erase every candidate that violates the restrictions above. (It could be + // true that we have viable candidates, so it's not worth bailing out in + // the case that, say, 1 out of 20 candidates violate the restructions.) + RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), + RepeatedSequenceLocs.end(), + CantGuaranteeValueAcrossCall), + RepeatedSequenceLocs.end()); + + // If the sequence doesn't have enough candidates left, then we're done. + if (RepeatedSequenceLocs.size() < 2) + return outliner::OutlinedFunction(); + } + + // At this point, we have only "safe" candidates to outline. Figure out + // frame + call instruction information. + + unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); + + // Helper lambda which sets call information for every candidate. + auto SetCandidateCallInfo = + [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { + for (outliner::Candidate &C : RepeatedSequenceLocs) + C.setCallInfo(CallID, NumBytesForCall); + }; + + bool Thumb = Subtarget.isThumb(); + unsigned FrameID = MachineOutlinerDefault; + unsigned NumBytesToCreateFrame = Thumb ? 2 : 4; + + // If the last instruction in any candidate is a terminator, then we should + // tail call all of the candidates. + if (Subtarget.supportsTailCall() + && RepeatedSequenceLocs[0].back()->isTerminator()) { + FrameID = MachineOutlinerTailCall; + NumBytesToCreateFrame = 0; + SetCandidateCallInfo(MachineOutlinerTailCall, 4); + } + + else if (Subtarget.supportsTailCall() + && (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX + || LastInstrOpcode == ARM::tBL)) { + // FIXME: Do we need to check if the code after this uses the value of LR? + FrameID = MachineOutlinerThunk; + NumBytesToCreateFrame = 0; + SetCandidateCallInfo(MachineOutlinerThunk, 4); + } + + else { + // We need to decide how to emit calls + frames. We can always emit the same + // frame if we don't need to save to the stack. If we have to save to the + // stack, then we need a different frame. + unsigned NumNoStackSave = 0; + + for (outliner::Candidate &C : RepeatedSequenceLocs) { + C.initLRU(TRI); + + // Is LR available? If so, we don't need a save. + if (C.LRU.available(ARM::LR)) { + C.setCallInfo(MachineOutlinerNoLRSave, 4); + ++NumNoStackSave; + } + + // Is an unused register available? If so, we won't modify the stack, so + // we can outline with the same frame type as those that don't save LR. + else if (findRegisterToSaveLRTo(C)) { + C.setCallInfo(MachineOutlinerRegSave, Thumb ? 8 : 12); + ++NumNoStackSave; + } + } + + // If there are no places where we have to save LR, then note that we don't + // have to update the stack. Otherwise, give every candidate the default + // call type. + if (NumNoStackSave == RepeatedSequenceLocs.size()) + FrameID = MachineOutlinerNoLRSave; + else + SetCandidateCallInfo(MachineOutlinerDefault, Thumb ? 8 : 12); + } + + // Does every candidate's MBB contain a call? If so, then we might have a + // call in the range. + if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { + // Check if the range contains a call. These require a save + restore of the + // link register. + if (std::any_of(FirstCand.front(), FirstCand.back(), + [](const MachineInstr &MI) { return MI.isCall(); })) + NumBytesToCreateFrame += Thumb ? 4 : 8; // Save + restore link register. + + // Handle the last instruction separately. If this is a tail call, then the + // last instruction is a call. We don't want to save + restore in this case. + // However, it could be possible that the last instruction is a call without + // it being valid to tail call this sequence. We should consider this as + // well. + else if (FrameID != MachineOutlinerThunk && + FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall()) + NumBytesToCreateFrame += Thumb ? 4 : 8; + } + + return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, + NumBytesToCreateFrame, FrameID); +} + +bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom( + MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { + const Function &F = MF.getFunction(); + + // Can F be deduplicated by the linker? If it can, don't outline from it. + if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) + return false; + + // Don't outline from functions with section markings; the program could + // expect that all the code is in the named section. + // FIXME: Allow outlining from multiple functions with the same section + // marking. + if (F.hasSection()) + return false; + + // It's safe to outline from MF. + return true; +} + +bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const { + // Check if LR is available through all of the MBB. If it's not, then set + // a flag. + assert(MBB.getParent()->getRegInfo().tracksLiveness() && + "Suitable Machine Function for outlining must track liveness"); + + LiveRegUnits LRU(getRegisterInfo()); + + std::for_each(MBB.rbegin(), MBB.rend(), + [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); + + // Check if each of the unsafe registers are available... + bool R12AvailableInBlock = LRU.available(ARM::R12); + bool CPSRAvailableInBlock = LRU.available(ARM::CPSR); + + // If all of these are dead (and not live out), we know we don't have to check + // them later. + if (R12AvailableInBlock && CPSRAvailableInBlock) + Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; + + // Now, add the live outs the set. + LRU.addLiveOuts(MBB); + + // Check if there's a call inside this MachineBasicBlock. If there is, then + // set a flag. + if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) + Flags |= MachineOutlinerMBBFlags::HasCalls; + + if (!LRU.available(ARM::LR)) + Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; + + return true; +} + +bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, + int64_t fixup, + bool Updt) const { + unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask); + int Idx = MI->findRegisterUseOperandIdx(ARM::SP) + 1; + int64_t Offset = 0; + unsigned NumBits = 0; + unsigned Scale = 1; + + if (!Idx) + // No SP operand + return true; + + switch (AddrMode) { + case ARMII::AddrMode1: // Arithmetic instructions + case ARMII::AddrMode4: // Load/Store Multiple + case ARMII::AddrMode6: // Neon Load/Store Multiple + case ARMII::AddrModeNone: + // Addressing mode doesn't handle any offset + return false; + break; + case ARMII::AddrModeT2_i8: + case ARMII::AddrModeT2_i8s4: + NumBits = 8; + Offset = MI->getOperand(Idx).getImm(); + break; + case ARMII::AddrMode5: + // VFP address mode. + NumBits = 8; + Scale = 4; + Offset = ARM_AM::getAM5Offset(MI->getOperand(Idx).getImm()); + break; + case ARMII::AddrMode5FP16: + // VFP address mode. + NumBits = 8; + Scale = 2; + Offset = ARM_AM::getAM5FP16Offset(MI->getOperand(Idx).getImm()); + break; + case ARMII::AddrModeT2_i12: + case ARMII::AddrMode_i12: + NumBits = 12; + Offset = MI->getOperand(Idx).getImm(); + break; + case ARMII::AddrMode2: + NumBits = 12; + Idx += 1; + Offset = ARM_AM::getAM2Offset(MI->getOperand(Idx).getImm()); + break; + case ARMII::AddrMode3: + NumBits = 8; + Idx += 1; + Offset = ARM_AM::getAM3Offset(MI->getOperand(Idx).getImm()); + break; + case ARMII::AddrModeT1_s: + case ARMII::AddrModeT1_4: + NumBits = 8; + Scale = 4; + Offset = MI->getOperand(Idx).getImm(); + break; + default: + llvm_unreachable("Unsupported addressing mode!"); + } + // Make sure the offset is encodable for instructions that scale the + // immediate. + if (((Offset*Scale+fixup) & (Scale-1)) != 0) + return false; + + Offset += fixup / Scale; + + unsigned Mask = (1 << NumBits) - 1; + if ((unsigned)Offset <= Mask * Scale) { + if (Updt) + MI->getOperand(Idx).setImm(Offset); + return true; + } + + return false; +} + +outliner::InstrType +ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const { + MachineInstr &MI = *MIT; + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction *MF = MBB->getParent(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + + // Don't allow debug values to impact outlining type. + if (MI.isDebugInstr() || MI.isIndirectDebugValue()) + return outliner::InstrType::Invisible; + + // At this point, KILL instructions don't really tell us much so we can go + // ahead and skip over them. + if (MI.isKill()) + return outliner::InstrType::Invisible; + + // PIC instructions contain labels, outlining them would break offset + // computing. + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case ARM::t2IT: + case ARM::tPICADD: + case ARM::PICADD: + case ARM::PICSTR: + case ARM::PICSTRB: + case ARM::PICSTRH: + case ARM::PICLDR: + case ARM::PICLDRB: + case ARM::PICLDRH: + case ARM::PICLDRSB: + case ARM::PICLDRSH: + return outliner::InstrType::Illegal; + default: + break; + } + + // Is this a terminator for a basic block? + if (MI.isTerminator()) { + // Don't outline if the branch is not unconditional. + if (Opc == ARM::BX_RET || Opc == ARM::tBX_RET || Opc == ARM::MOVPCLR) { + if (MI.getOperand(0).getImm() != ARMCC::AL) + return outliner::InstrType::Illegal; + } + if (Opc == ARM::LDMIA_RET) { + if (MI.getOperand(2).getImm() != ARMCC::AL) + return outliner::InstrType::Illegal; + } + + // Is this the end of a function? + if (MI.getParent()->succ_empty()) + return outliner::InstrType::Legal; + + // It's not, so don't outline it. + return outliner::InstrType::Illegal; + } + + // Make sure none of the operands are un-outlinable. + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || + MOP.isTargetIndex()) + return outliner::InstrType::Illegal; + + // If it uses LR then don't touch it. + if (MOP.isReg() && !MOP.isImplicit() && (MOP.getReg() == ARM::LR)) + return outliner::InstrType::Illegal; + } + + // If MI is a call we might be able to outline it. We don't want to outline + // any calls that rely on the position of items on the stack. When we outline + // something containing a call, we have to emit a save and restore of LR in + // the outlined function. Currently, this always happens by saving LR to the + // stack. Thus, if we outline, say, half the parameters for a function call + // plus the call, then we'll break the callee's expectations for the layout + // of the stack. + // + // FIXME: Allow calls to functions which construct a stack frame, as long + // as they don't access arguments on the stack. + // FIXME: Figure out some way to analyze functions defined in other modules. + // We should be able to compute the memory usage based on the IR calling + // convention, even if we can't see the definition. + if (MI.isCall()) { + // Get the function associated with the call. Look at each operand and find + // the one that represents the callee and get its name. + const Function *Callee = nullptr; + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isGlobal()) { + Callee = dyn_cast(MOP.getGlobal()); + break; + } + } + + // Never outline calls to mcount. There isn't any rule that would require + // this, but the Linux kernel's "ftrace" feature depends on it. + if (Callee && Callee->getName() == "\01_mcount") + return outliner::InstrType::Illegal; + + // If we don't know anything about the callee, assume it depends on the + // stack layout of the caller. In that case, it's only legal to outline + // as a tail-call. Whitelist the call instructions we know about so we + // don't get unexpected results with call pseudo-instructions. + auto UnknownCallOutlineType = outliner::InstrType::Illegal; + if (MI.getOpcode() == ARM::BL || MI.getOpcode() == ARM::tBL + || MI.getOpcode() == ARM::BLX) + UnknownCallOutlineType = outliner::InstrType::LegalTerminator; + + if (!Callee) + return UnknownCallOutlineType; + + // We have a function we have information about. Check it if it's something + // can safely outline. + MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); + + // We don't know what's going on with the callee at all. Don't touch it. + if (!CalleeMF) + return UnknownCallOutlineType; + + // Check if we know anything about the callee saves on the function. If we + // don't, then don't touch it, since that implies that we haven't + // computed anything about its stack frame yet. + MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || + MFI.getNumObjects() > 0) + return UnknownCallOutlineType; + + // At this point, we can say that CalleeMF ought to not pass anything on the + // stack. Therefore, we can outline it. + return outliner::InstrType::Legal; + } + + // Don't outline positions. + if (MI.isPosition()) + return outliner::InstrType::Illegal; + + // Don't touch the link register + if (MI.readsRegister(ARM::LR, &getRegisterInfo()) || + MI.modifiesRegister(ARM::LR, &getRegisterInfo())) + return outliner::InstrType::Illegal; + + // Does this use the stack? + if (MI.modifiesRegister(ARM::SP, TRI) || + MI.readsRegister(ARM::SP, TRI)) { + // True if there is no chance that any outlined candidate from this range + // could require stack fixups. That is, both + // * LR is available in the range (No save/restore around call) + // * The range doesn't include calls (No save/restore in outlined frame) + // are true. + // FIXME: This is very restrictive; the flags check the whole block, + // not just the bit we will try to outline. + bool MightNeedStackFixUp = + (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere | + MachineOutlinerMBBFlags::HasCalls)); + + // If this instruction is in a range where it *never* needs to be fixed + // up, then we can *always* outline it. This is true even if it's not + // possible to fix that instruction up. + // + // Why? Consider two equivalent instructions I1, I2 where both I1 and I2 + // use SP. Suppose that I1 sits within a range that definitely doesn't + // need stack fixups, while I2 sits in a range that does. + // + // First, I1 can be outlined as long as we *never* fix up the stack in + // any sequence containing it. I1 is already a safe instruction in the + // original program, so as long as we don't modify it we're good to go. + // So this leaves us with showing that outlining I2 won't break our + // program. + // + // Suppose I1 and I2 belong to equivalent candidate sequences. When we + // look at I2, we need to see if it can be fixed up. Suppose I2, (and + // thus I1) cannot be fixed up. Then I2 will be assigned an unique + // integer label; thus, I2 cannot belong to any candidate sequence (a + // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up + // as well, so we're good. Thus, I1 is always safe to outline. + if (!MightNeedStackFixUp) + return outliner::InstrType::Legal; + + // Any modification of SP will break our code to save/restore LR. + // FIXME: We could handle some instructions which add a constant offset to + // SP, with a bit more work. + if (MI.modifiesRegister(ARM::SP, TRI)) + return outliner::InstrType::Illegal; + + // At this point, we have a stack instruction that we might need to fix up. + // up. We'll handle it if it's a load or store. + if (checkAndUpdateStackOffset(&MI, Subtarget.isThumb1Only() ? 8 : 4, false)) { + // It's in range, so we can outline it. + return outliner::InstrType::Legal; + } + + // We can't fix it up, so don't outline it. + return outliner::InstrType::Illegal; + } + + return outliner::InstrType::Legal; +} + +void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { + for (MachineInstr &MI : MBB) { + if (!MI.mayLoadOrStore()) { + continue; + } + checkAndUpdateStackOffset(&MI, Subtarget.isThumb1Only() ? 8 : 4, true); + } +} + +void ARMBaseInstrInfo::buildOutlinedFrame( + MachineBasicBlock &MBB, MachineFunction &MF, + const outliner::OutlinedFunction &OF) const { + bool Thumb = Subtarget.isThumb(); + // For thunk outlining, rewrite the last instruction from a call to a + // tail-call. + if (OF.FrameConstructionID == MachineOutlinerThunk) { + MachineInstr *Call = &*--MBB.instr_end(); + MachineInstr *TC; + + if (Thumb) + TC = BuildMI(MF, DebugLoc(), get(ARM::tTAILJMPdND)) + .add(Call->getOperand(2)) + .add(predOps(ARMCC::AL)); + else if (Call->getOperand(0).isReg()) + TC = BuildMI(MF, DebugLoc(), get(ARM::TAILJMPr)) + .add(Call->getOperand(0)); + else + TC = BuildMI(MF, DebugLoc(), get(ARM::TAILJMPd)) + .add(Call->getOperand(0)); + MBB.insert(MBB.end(), TC); + Call->eraseFromParent(); + } + // Is there a call in the outlined range? + auto IsNonTailCall = [](MachineInstr &MI) { + return MI.isCall() && !MI.isReturn(); + }; + if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) { + // Fix up the instructions in the range, since we're going to modify the + // stack. + assert(OF.FrameConstructionID != MachineOutlinerDefault && + "Can only fix up stack references once"); + fixupPostOutline(MBB); + + // LR has to be a live in so that we can save it. + MBB.addLiveIn(ARM::LR); + + MachineBasicBlock::iterator It = MBB.begin(); + MachineBasicBlock::iterator Et = MBB.end(); + + if (OF.FrameConstructionID == MachineOutlinerTailCall || + OF.FrameConstructionID == MachineOutlinerThunk) + Et = std::prev(MBB.end()); + + // Insert a save before the outlined region + MachineInstr *Save; + if (Thumb) + if (Subtarget.isThumb1Only()) + Save = BuildMI(MF, DebugLoc(), get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(ARM::R7) + .addReg(ARM::LR); + else + Save = BuildMI(MF, DebugLoc(), get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(ARM::LR); + else + Save = BuildMI(MF, DebugLoc(), get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ARM::LR); + + It = MBB.insert(It, Save); + + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const MCRegisterInfo *MRI = STI.getRegisterInfo(); + unsigned DwarfReg = MRI->getDwarfRegNum(ARM::LR, true); + + // Add a CFI saying the stack was moved 8 B down. + int64_t StackPosEntry = + MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 8)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(StackPosEntry) + .setMIFlags(MachineInstr::FrameSetup); + + // Add a CFI saying that the LR that we want to find is now 8 B higher than + // before. + int64_t LRPosEntry = + MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 8)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(LRPosEntry) + .setMIFlags(MachineInstr::FrameSetup); + + // Insert a restore before the terminator for the function. + MachineInstr *Restore; + if (Thumb) + if (Subtarget.isThumb1Only()) + Restore = BuildMI(MF, DebugLoc(), get(ARM::tPOP_RET)) + .add(predOps(ARMCC::AL)) + .addReg(ARM::R7) + .addReg(ARM::PC); + else + Restore = BuildMI(MF, DebugLoc(), get(ARM::t2LDR_POST)) + .addReg(ARM::LR, RegState::Define) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .addImm(4) + .add(predOps(ARMCC::AL)); + else + Restore = BuildMI(MF, DebugLoc(), get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ARM::LR); + + Et = MBB.insert(Et, Restore); + } + + // If this is a tail call outlined function, then there's already a return. + if (OF.FrameConstructionID == MachineOutlinerTailCall || + OF.FrameConstructionID == MachineOutlinerThunk) + return; + + // It's not a tail call, so we have to insert the return ourselves unless we + // are in Thumb1 where LR is restored into PC. Get the correct opcode from + //current feature set. + if (!Subtarget.isThumb1Only()) { + MachineInstr *ret = BuildMI(MF, DebugLoc(), + get(Subtarget.getReturnOpcode())) + .add(predOps(ARMCC::AL)); + MBB.insert(MBB.end(), ret); + } + + // Did we have to modify the stack by saving the link register? + if (OF.FrameConstructionID != MachineOutlinerDefault) + return; + + // We modified the stack. + // Walk over the basic block and fix up all the stack accesses. + fixupPostOutline(MBB); +} + +MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( + Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, + MachineFunction &MF, const outliner::Candidate &C) const { + + bool Thumb = Subtarget.isThumb(); + // Are we tail calling? + if (C.CallConstructionID == MachineOutlinerTailCall) { + // If yes, then we can just branch to the label. + if (Thumb) + It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(ARM::tTAILJMPdND)) + .addGlobalAddress(M.getNamedValue(MF.getName())) + .add(predOps(ARMCC::AL))); + else + It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(ARM::TAILJMPd)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + return It; + } + + // Are we saving the link register? + if (C.CallConstructionID == MachineOutlinerNoLRSave || + C.CallConstructionID == MachineOutlinerThunk) { + // No, so just insert the call. + if (Thumb) + It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(ARM::tBL)) + .add(predOps(ARMCC::AL)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + else + It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(ARM::BL)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + return It; + } + + // We want to return the spot where we inserted the call. + MachineBasicBlock::iterator CallPt; + + // Instructions for saving and restoring LR around the call instruction we're + // going to insert. + MachineInstr *Save; + MachineInstr *Restore; + MBB.addLiveIn(ARM::LR); + // Can we save to a register? + if (C.CallConstructionID == MachineOutlinerRegSave) { + // FIXME: This logic should be sunk into a target-specific interface so that + // we don't have to recompute the register. + unsigned Reg = findRegisterToSaveLRTo(C); + assert(Reg != 0 && "No callee-saved register available?"); + + // Save and restore LR from that register. + if (Thumb) { + Save = BuildMI(MF, DebugLoc(), get(ARM::tMOVr), Reg) + .addReg(ARM::LR) + .add(predOps(ARMCC::AL)); + Restore = BuildMI(MF, DebugLoc(), get(ARM::tMOVr), ARM::LR) + .addReg(Reg) + .add(predOps(ARMCC::AL)); + } else { + Save = BuildMI(MF, DebugLoc(), get(ARM::MOVr), Reg) + .addReg(ARM::LR) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + Restore = BuildMI(MF, DebugLoc(), get(ARM::MOVr), ARM::LR) + .addReg(Reg) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } + } else { + // We have the default case. Save and restore from SP. + if (Thumb) + if (Subtarget.isThumb1Only()) { + Save = BuildMI(MF, DebugLoc(), get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(ARM::R7) + .addReg(ARM::LR); + Restore = BuildMI(MF, DebugLoc(), get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .addReg(ARM::R7, RegState::Define); + } else { + Save = BuildMI(MF, DebugLoc(), get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(ARM::LR); + Restore = BuildMI(MF, DebugLoc(), get(ARM::t2LDR_POST)) + .addReg(ARM::LR, RegState::Define) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .addImm(4) + .add(predOps(ARMCC::AL)); + } + else { + Save = BuildMI(MF, DebugLoc(), get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ARM::LR); + Restore = BuildMI(MF, DebugLoc(), get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ARM::LR); + } + } + + It = MBB.insert(It, Save); + It++; + + // Insert the call. + if (Thumb) + It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(ARM::tBL)) + .add(predOps(ARMCC::AL)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + else + It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(ARM::BL)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + + CallPt = It; + It++; + + // Restore the link register. + It = MBB.insert(It, Restore); + + // Thumb1 special handling to restore LR + if (C.CallConstructionID != MachineOutlinerRegSave + && Subtarget.isThumb1Only()) { + It++; + It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(ARM::R7, RegState::Kill) + .add(predOps(ARMCC::AL))); + It++; + It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .addReg(ARM::R7, RegState::Define)); + } + + return CallPt; +} Index: lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- lib/Target/ARM/ARMTargetMachine.cpp +++ lib/Target/ARM/ARMTargetMachine.cpp @@ -236,6 +236,10 @@ } initAsmInfo(); + + // ARM supports the MachineOutliner. + setMachineOutliner(true); + setSupportsDefaultOutlining(true); } ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; @@ -351,6 +355,7 @@ void addPreRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; + void addPreEmitPass2() override; }; class ARMExecutionDomainFix : public ExecutionDomainFix { @@ -504,6 +509,8 @@ // Don't optimize barriers at -O0. if (getOptLevel() != CodeGenOpt::None) addPass(createARMOptimizeBarriersPass()); +} +void ARMPassConfig::addPreEmitPass2() { addPass(createARMConstantIslandPass()); } Index: test/CodeGen/ARM/machine-outliner-default.mir =================================================================== --- /dev/null +++ test/CodeGen/ARM/machine-outliner-default.mir @@ -0,0 +1,72 @@ +# RUN: llc -mtriple=arm-- -run-pass=machine-outliner -verify-machineinstrs \ +# RUN: %s -o - | FileCheck %s +# RUN: llc -mtriple=thumbv7-- -run-pass=machine-outliner -verify-machineinstrs \ +# RUN: %s -o - | FileCheck %s + +--- | + define void @outline_1() #0 { ret void } + define void @outline_2() #0 { ret void } + define void @outline_3() #0 { ret void } + define void @dont_outline() { ret void } + + attributes #0 = { minsize optsize } +... +--- + +name: outline_1 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: OUTLINED + liveins: $r2 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + BX_RET 14, $noreg +... +--- + +name: outline_2 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: OUTLINED + liveins: $r2 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + BX_RET 14, $noreg +... +--- + +name: outline_3 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: OUTLINED + liveins: $r2 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + BX_RET 14, $noreg +... +--- + +name: dont_outline +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + liveins: $lr, $r2 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + BX_RET 14, $noreg Index: test/CodeGen/ARM/machine-outliner-tail.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/machine-outliner-tail.ll @@ -0,0 +1,31 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=arm-- \ +; RUN: < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=thumbv7-- \ +; RUN: < %s | FileCheck %s --check-prefix=THUMB + +; ARM-LABEL: OUTLINED_FUNCTION_0: +; ARM: mov r0, #1 +; ARM-NEXT: mov r1, #2 +; ARM-NEXT: mov r2, #3 +; ARM-NEXT: mov r3, #4 +; ARM-NEXT: b z + +; THUMB-LABEL: OUTLINED_FUNCTION_0: +; THUMB: movs r0, #1 +; THUMB-NEXT: movs r1, #2 +; THUMB-NEXT: movs r2, #3 +; THUMB-NEXT: movs r3, #4 +; THUMB-NEXT: b z +define void @a() { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} + +declare void @z(i32, i32, i32, i32) + +define dso_local void @b(i32* nocapture readnone %p) { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} Index: test/CodeGen/ARM/machine-outliner-thunk.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/machine-outliner-thunk.ll @@ -0,0 +1,62 @@ +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=armv7-- \ +; RUN: < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv7-- \ +; RUN: < %s | FileCheck %s --check-prefix=THUMB + +declare i32 @thunk_called_fn(i32, i32, i32, i32) + +define i32 @a() { +; ARM-LABEL: a: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: push {r11, lr} +; ARM-NEXT: bl OUTLINED_FUNCTION_0 +; ARM-NEXT: add r0, r0, #8 +; ARM-NEXT: pop {r11, pc} + +; THUMB-LABEL: a: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: push {r7, lr} +; THUMB-NEXT: bl OUTLINED_FUNCTION_0 +; THUMB-NEXT: adds r0, #8 +; THUMB-NEXT: pop {r7, pc} +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 8 + ret i32 %cx +} + +define i32 @b() { +; ARM-LABEL: b: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: push {r11, lr} +; ARM-NEXT: bl OUTLINED_FUNCTION_0 +; ARM-NEXT: add r0, r0, #88 +; ARM-NEXT: pop {r11, pc} + +; THUMB-LABEL: b: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: push {r7, lr} +; THUMB-NEXT: bl OUTLINED_FUNCTION_0 +; THUMB-NEXT: adds r0, #88 +; THUMB-NEXT: pop {r7, pc} +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 88 + ret i32 %cx +} + +; ARM-LABEL: OUTLINED_FUNCTION_0: +; ARM: @ %bb.0: +; ARM-NEXT: mov r0, #1 +; ARM-NEXT: mov r1, #2 +; ARM-NEXT: mov r2, #3 +; ARM-NEXT: mov r3, #4 +; ARM-NEXT: b thunk_called_fn + +; THUMB-LABEL: OUTLINED_FUNCTION_0: +; THUMB: @ %bb.0: +; THUMB-NEXT: movs r0, #1 +; THUMB-NEXT: movs r1, #2 +; THUMB-NEXT: movs r2, #3 +; THUMB-NEXT: movs r3, #4 +; THUMB-NEXT: b thunk_called_fn Index: test/CodeGen/ARM/machine-outliner.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/machine-outliner.ll @@ -0,0 +1,135 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=arm-- \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ARM +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=thumbv7-- \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,THUMB +; RUN: llc -verify-machineinstrs -enable-machine-outliner \ +; RUN: -enable-linkonceodr-outlining -mtriple=arm-- < %s | FileCheck %s \ +; RUN: --check-prefix=ODR +; RUN: llc -verify-machineinstrs -enable-machine-outliner \ +; RUN: -enable-linkonceodr-outlining -mtriple=thumbv7-- < %s | FileCheck %s \ +; RUN: --check-prefix=ODR +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=arm-- \ +; RUN: -stop-after=machine-outliner < %s | FileCheck %s \ +; RUN: --check-prefix=TARGET_FEATURES + + +; Make sure that we inherit target features from functions and make sure we have +; the right function attributes. +; TARGET_FEATURES: define internal void @OUTLINED_FUNCTION_{{[0-9]+}}() +; TARGET_FEATURES-SAME: #[[ATTR_NUM:[0-9]+]] +; TARGET_FEATURES-DAG: attributes #[[ATTR_NUM]] = { +; TARGET_FEATURES-SAME: minsize +; TARGET_FEATURES-SAME: optsize +; TARGET_FEATURES-SAME: "target-features"="+neon" + +define linkonce_odr void @fish() #0 { + ; CHECK-LABEL: fish: + ; CHECK-NOT: OUTLINED + ; ODR: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]] + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + ret void +} + +define void @turtle() section "TURTLE,turtle" { + ; CHECK-LABEL: turtle: + ; ODR-LABEL: turtle: + ; CHECK-NOT: OUTLINED + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + ret void +} + +define void @cat() #0 { + ; CHECK-LABEL: cat: + ; CHECK: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]] + ; ODR: [[OUTLINED]] + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + ret void +} + +define void @dog() #0 { + ; CHECK-LABEL: dog: + ; CHECK: [[OUTLINED]] + ; ODR: [[OUTLINED]] + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + ret void +} + +; ODR: [[OUTLINED]]: +; ARM: .code 32 +; ARM-NEXT: [[OUTLINED]]: +; ARM: mov r0, #2 +; ARM-NEXT: str r0, [sp, #16] +; ARM-NEXT: mov r0, #1 +; ARM-NEXT: str r0, [sp, #20] +; ARM-NEXT: mov r0, #3 +; ARM-NEXT: str r0, [sp, #12] +; ARM-NEXT: mov r0, #4 +; ARM-NEXT: str r0, [sp, #8] +; ARM-NEXT: mov r0, #5 +; ARM-NEXT: str r0, [sp, #4] +; ARM-NEXT: mov r0, #6 +; ARM-NEXT: str r0, [sp] +; ARM-NEXT: mov pc, lr + +; THUMB: .code 16 +; THUMB-NEXT: .thumb_func +; THUMB-NEXT: [[OUTLINED]]: +; THUMB: movs r0, #2 +; THUMB-NEXT: str r0, [sp, #16] +; THUMB-NEXT: movs r0, #1 +; THUMB-NEXT: str r0, [sp, #20] +; THUMB-NEXT: movs r0, #3 +; THUMB-NEXT: str r0, [sp, #12] +; THUMB-NEXT: movs r0, #4 +; THUMB-NEXT: str r0, [sp, #8] +; THUMB-NEXT: movs r0, #5 +; THUMB-NEXT: str r0, [sp, #4] +; THUMB-NEXT: movs r0, #6 +; THUMB-NEXT: str r0, [sp] +; THUMB-NEXT: bx lr + +attributes #0 = { nounwind "target-cpu"="cortex-a53" "target-features"="+neon" }