diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5992,8 +5992,9 @@ // We only support -moutline in AArch64 right now. If we're not compiling // for AArch64, emit a warning and ignore the flag. Otherwise, add the // proper mllvm flags. - if (Triple.getArch() != llvm::Triple::aarch64 && - Triple.getArch() != llvm::Triple::aarch64_32) { + if (!(Triple.isARM() || Triple.isThumb() || + Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32)) { D.Diag(diag::warn_drv_moutline_unsupported_opt) << Triple.getArchName(); } else { CmdArgs.push_back("-mllvm"); diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h --- a/llvm/include/llvm/CodeGen/TargetPassConfig.h +++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h @@ -132,6 +132,9 @@ /// callers. bool RequireCodeGenSCCOrder = false; + /// Default setting for -enable-machine-outliner + bool MachineOutlinerEnabled = false; + /// Add the actual instruction selection passes. This does not include /// preparation passes on IR. bool addCoreISelPasses(); diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -1155,6 +1155,9 @@ // Outlined functions shouldn't preserve liveness. MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); MF.getRegInfo().freezeReservedRegs(MF); + MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA); + MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs); + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); // If there's a DISubprogram associated with this outlined function, then // emit debug info for the outlined function. diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -979,8 +979,10 @@ bool RunOnAllFunctions = (EnableMachineOutliner == AlwaysOutline); bool AddOutliner = RunOnAllFunctions || TM->Options.SupportsDefaultOutlining; - if (AddOutliner) + if (AddOutliner) { + MachineOutlinerEnabled = true; addPass(createMachineOutlinerPass(RunOnAllFunctions)); + } } // Add passes that directly emit MI after all other MI passes. diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -343,7 +343,48 @@ ArrayRef> getSerializableBitmaskMachineOperandTargetFlags() const override; + /// ARM supports the MachineOutliner. + bool isFunctionSafeToOutlineFrom(MachineFunction &MF, + bool OutlineFromLinkOnceODRs) const override; + outliner::OutlinedFunction getOutliningCandidateInfo( + std::vector &RepeatedSequenceLocs) const override; + outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const override; + bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const override; + void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, + const outliner::OutlinedFunction &OF) const override; + MachineBasicBlock::iterator + insertOutlinedCall(Module &M, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It, MachineFunction &MF, + const outliner::Candidate &C) const override; + private: + /// \brief Sets the offsets on outlined instructions in \p MBB which use SP + /// so that they will be valid post-outlining. + /// + /// \param MBB A \p MachineBasicBlock in an outlined function. + void fixupPostOutline(MachineBasicBlock &MBB) const; + + /// Returns an unused general-purpose register which can be used for + /// constructing an outlined call if one exists. Returns 0 otherwise. + unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; + + /// Adds an instruction which saves the link register on top of the stack into + /// the MachineBasicBlock \p MBB at position \p It. + void saveLROnStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It) const; + + /// Adds an instruction which restores the link register from the top the + /// stack into the MachineBasicBlock \p MBB at position \p It. + void restoreLRFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It) const; + + /// Returns true if the machine instruction offset can handle the stack fixup + /// and updates it if requested. + bool checkAndUpdateStackOffset(MachineInstr *MI, int64_t Fixup, + bool Updt) const; + unsigned getInstBundleLength(const MachineInstr &MI) const; int getVLDMDefCycle(const InstrItineraryData *ItinData, diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" @@ -5460,3 +5461,892 @@ return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) < ConstantMaterializationCost(Val2, Subtarget, !ForCodesize); } + +/// Constants defining how certain sequences should be outlined. +/// This encompasses how an outlined function should be called, and what kind of +/// frame should be emitted for that outlined function. +/// +/// \p MachineOutlinerDefault implies that the function should be called with +/// a save and restore of LR to the stack. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 8 | 12 | +/// | Frame overhead in Bytes | 2 | 4 | +/// | Stack fixup required | Yes | Yes | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerRegSave implies that the function should be called with a +/// save and restore of LR to an available register. This allows us to avoid +/// stack fixups. Note that this outlining variant is compatible with the +/// NoLRSave case. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 8 | 12 | +/// | Frame overhead in Bytes | 2 | 4 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerTailCall implies that the function is being created from +/// a sequence of instructions ending in a return. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> B OUTLINED_FUNCTION I1 +/// BX LR I2 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 0 | 0 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerNoLRSave implies that the function should be called using +/// a BL instruction, but doesn't require LR to be saved and restored. This +/// happens when LR is known to be dead. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 I2 +/// I3 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 4 | 4 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerThunk implies that the function is being created from +/// a sequence of instructions ending in a call. The outlined function is +/// called with a BL instruction, and the outlined function tail-calls the +/// original call destination. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// BL f I2 +/// B f +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 0 | 0 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ + +enum MachineOutlinerClass { + MachineOutlinerDefault, /// Emit a save, restore, call, and return. + MachineOutlinerRegSave, /// Same as default, but save to a register. + MachineOutlinerTailCall, /// Only emit a branch. + MachineOutlinerThunk, /// Emit a call and tail-call. + MachineOutlinerNoLRSave /// Emit a call and return. +}; + +enum MachineOutlinerMBBFlags { + LRUnavailableSomewhere = 0x2, + HasCalls = 0x4, + UnsafeRegsDead = 0x8 +}; + +struct OutlinerCosts { + const int CallDefault; + const int FrameDefault; + const int CallRegSave; + const int FrameRegSave; + const int CallNoLRSave; + const int FrameNoLRSave; + const int CallTailCall; + const int FrameTailCall; + const int CallThunk; + const int FrameThunk; + + OutlinerCosts(const ARMSubtarget &target) + : CallDefault(target.isThumb() ? 8 : 12), + FrameDefault(target.isThumb() ? 2 : 4), + CallRegSave(target.isThumb() ? 8 : 12), + FrameRegSave(target.isThumb() ? 2 : 4), + CallNoLRSave(target.isThumb() ? 4 : 4), + FrameNoLRSave(target.isThumb() ? 4 : 4), + CallTailCall(target.isThumb() ? 4 : 4), + FrameTailCall(target.isThumb() ? 0 : 0), + CallThunk(target.isThumb() ? 4 : 4), + FrameThunk(target.isThumb() ? 0 : 0) {} +}; + +unsigned +ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { + assert(C.LRUWasSet && "LRU wasn't set?"); + MachineFunction *MF = C.getMF(); + const ARMBaseRegisterInfo *ARI = static_cast( + MF->getSubtarget().getRegisterInfo()); + + BitVector regsReserved = ARI->getReservedRegs(*MF); + // Check if there is an available register across the sequence that we can + // use. + for (unsigned Reg : + (Subtarget.isThumb() ? ARM::tGPRRegClass : ARM::GPRRegClass)) { + if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) && + Reg != ARM::LR && // LR is not reserved, but don't use it. + Reg != ARM::R12 && // R12 is not guaranteed to be preserved. + C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) + return Reg; + } + + // No suitable register. Return 0. + return 0u; +} + +outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( + std::vector &RepeatedSequenceLocs) const { + outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; + unsigned SequenceSize = + std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0, + [this](unsigned Sum, const MachineInstr &MI) { + return Sum + getInstSizeInBytes(MI); + }); + + // Properties about candidate MBBs that hold for all of them. + unsigned FlagsSetInAll = 0xF; + + // Compute liveness information for each candidate, and set FlagsSetInAll. + const TargetRegisterInfo &TRI = getRegisterInfo(); + std::for_each( + RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; }); + + // According to the ARM Procedure Call Standard, the following are + // undefined on entry/exit from a function call: + // + // * Register R12(IP), + // * Condition codes (and thus the CPSR register) + // + // Because of this, we can't outline any sequence of instructions where one + // of these registers is live into/across it. Thus, we need to delete those + // candidates. + auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { + // If the unsafe registers in this block are all dead, then we don't need + // to compute liveness here. + if (C.Flags & UnsafeRegsDead) + return false; + C.initLRU(TRI); + LiveRegUnits LRU = C.LRU; + return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR)); + }; + + // Are there any candidates where those registers are live? + if (!(FlagsSetInAll & UnsafeRegsDead)) { + // Erase every candidate that violates the restrictions above. (It could be + // true that we have viable candidates, so it's not worth bailing out in + // the case that, say, 1 out of 20 candidates violate the restructions.) + RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), + RepeatedSequenceLocs.end(), + CantGuaranteeValueAcrossCall), + RepeatedSequenceLocs.end()); + + // If the sequence doesn't have enough candidates left, then we're done. + if (RepeatedSequenceLocs.size() < 2) + return outliner::OutlinedFunction(); + } + + // At this point, we have only "safe" candidates to outline. Figure out + // frame + call instruction information. + + unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); + + // Helper lambda which sets call information for every candidate. + auto SetCandidateCallInfo = + [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { + for (outliner::Candidate &C : RepeatedSequenceLocs) + C.setCallInfo(CallID, NumBytesForCall); + }; + + OutlinerCosts *Costs = new OutlinerCosts(Subtarget); + unsigned FrameID = MachineOutlinerDefault; + unsigned NumBytesToCreateFrame = Costs->FrameDefault; + + // If the last instruction in any candidate is a terminator, then we should + // tail call all of the candidates. + if (RepeatedSequenceLocs[0].back()->isTerminator()) { + FrameID = MachineOutlinerTailCall; + NumBytesToCreateFrame = Costs->FrameTailCall; + SetCandidateCallInfo(MachineOutlinerTailCall, Costs->CallTailCall); + } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX || + LastInstrOpcode == ARM::tBL || LastInstrOpcode == ARM::tBLXr || + LastInstrOpcode == ARM::tBLXi) { + // FIXME: Do we need to check if the code after this uses the value of LR? + FrameID = MachineOutlinerThunk; + NumBytesToCreateFrame = Costs->FrameThunk; + SetCandidateCallInfo(MachineOutlinerThunk, Costs->CallThunk); + } else { + // We need to decide how to emit calls + frames. We can always emit the same + // frame if we don't need to save to the stack. If we have to save to the + // stack, then we need a different frame. + unsigned NumBytesNoStackCalls = 0; + std::vector CandidatesWithoutStackFixups; + + for (outliner::Candidate &C : RepeatedSequenceLocs) { + C.initLRU(TRI); + + // Is LR available? If so, we don't need a save. + if (C.LRU.available(ARM::LR)) { + NumBytesNoStackCalls += Costs->CallNoLRSave; + C.setCallInfo(MachineOutlinerNoLRSave, Costs->CallNoLRSave); + CandidatesWithoutStackFixups.push_back(C); + } + + // Is an unused register available? If so, we won't modify the stack, so + // we can outline with the same frame type as those that don't save LR. + else if (findRegisterToSaveLRTo(C)) { + NumBytesNoStackCalls += Costs->CallRegSave; + C.setCallInfo(MachineOutlinerRegSave, Costs->CallRegSave); + CandidatesWithoutStackFixups.push_back(C); + } + + // Is SP used in the sequence at all? If not, we don't have to modify + // the stack, so we are guaranteed to get the same frame. + else if (C.UsedInSequence.available(ARM::SP)) { + NumBytesNoStackCalls += Costs->CallDefault; + C.setCallInfo(MachineOutlinerDefault, Costs->CallDefault); + CandidatesWithoutStackFixups.push_back(C); + } + + // If we outline this, we need to modify the stack. Pretend we don't + // outline this by saving all of its bytes. + else { + NumBytesNoStackCalls += SequenceSize; + } + } + + // If there are no places where we have to save LR, then note that we don't + // have to update the stack. Otherwise, give every candidate the default + // call type. + if (NumBytesNoStackCalls <= + RepeatedSequenceLocs.size() * Costs->CallDefault) { + RepeatedSequenceLocs = CandidatesWithoutStackFixups; + FrameID = MachineOutlinerNoLRSave; + } else { + SetCandidateCallInfo(MachineOutlinerDefault, Costs->CallDefault); + } + } + + // Does every candidate's MBB contain a call? If so, then we might have a + // call in the range. + if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { + // Check if the range contains a call. These require a save + restore of the + // link register. + if (std::any_of(FirstCand.front(), FirstCand.back(), + [](const MachineInstr &MI) { return MI.isCall(); })) + NumBytesToCreateFrame += Costs->FrameDefault; + + // Handle the last instruction separately. If this is a tail call, then the + // last instruction is a call. We don't want to save + restore in this case. + // However, it could be possible that the last instruction is a call without + // it being valid to tail call this sequence. We should consider this as + // well. + else if (FrameID != MachineOutlinerThunk && + FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall()) + NumBytesToCreateFrame += Costs->FrameThunk; + } + + return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, + NumBytesToCreateFrame, FrameID); +} + +bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom( + MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { + const Function &F = MF.getFunction(); + + // Can F be deduplicated by the linker? If it can, don't outline from it. + if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) + return false; + + // Don't outline from functions with section markings; the program could + // expect that all the code is in the named section. + // FIXME: Allow outlining from multiple functions with the same section + // marking. + if (F.hasSection()) + return false; + + // FIXME: Thumb1 outlining is not handled + if (MF.getInfo()->isThumb1OnlyFunction()) + return false; + + // It's safe to outline from MF. + return true; +} + +bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const { + // Check if LR is available through all of the MBB. If it's not, then set + // a flag. + assert(MBB.getParent()->getRegInfo().tracksLiveness() && + "Suitable Machine Function for outlining must track liveness"); + + LiveRegUnits LRU(getRegisterInfo()); + + std::for_each(MBB.rbegin(), MBB.rend(), + [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); + + // Check if each of the unsafe registers are available... + bool R12AvailableInBlock = LRU.available(ARM::R12); + bool CPSRAvailableInBlock = LRU.available(ARM::CPSR); + + // If all of these are dead (and not live out), we know we don't have to check + // them later. + if (R12AvailableInBlock && CPSRAvailableInBlock) + Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; + + // Now, add the live outs the set. + LRU.addLiveOuts(MBB); + + // If any of these registers is available in the MBB, but also a live out of + // the block, then we know outlining is unsafe. + if (R12AvailableInBlock && !LRU.available(ARM::R12)) + return false; + if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR)) + return false; + + // Check if there's a call inside this MachineBasicBlock. If there is, then + // set a flag. + if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) + Flags |= MachineOutlinerMBBFlags::HasCalls; + + if (!LRU.available(ARM::LR)) + Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; + + return true; +} + +bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, + int64_t Fixup, + bool Updt) const { + int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP); + + if (SPIdx < 0) + // No SP operand + return true; + + unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask); + + // Stack might be involved but addressing mode doesn't handle any offset. + // Rq: AddrModeT1_[1|2|4] don't operate on SP + if (AddrMode == ARMII::AddrMode1 // Arithmetic instructions + || AddrMode == ARMII::AddrMode4 // Load/Store Multiple + || AddrMode == ARMII::AddrMode6 // Neon Load/Store Multiple + || AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register + || AddrMode == ARMII::AddrModeT2_pc // PCrel access + || AddrMode == ARMII::AddrMode2 // Used by PRE and POST indexed LD/ST + || AddrMode == ARMII::AddrModeNone) + return false; + + // If SP is not the base register we can't do much + if (SPIdx != 1) { + if (AddrMode != ARMII::AddrModeT2_i8s4) + return false; + else if (SPIdx != 2) + return false; + } + + unsigned NumOps = MI->getDesc().getNumOperands(); + unsigned ImmIdx = NumOps - 3; + + const MachineOperand &Offset = MI->getOperand(ImmIdx); + assert(Offset.isImm() && "Is not an immediate"); + int64_t OffVal = Offset.getImm(); + + if (OffVal < 0) + // Don't override data if the are below SP. + return false; + + unsigned NumBits = 0; + unsigned Scale = 1; + + switch (AddrMode) { + case ARMII::AddrMode3: + if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub) + return false; + OffVal = ARM_AM::getAM3Offset(OffVal); + NumBits = 8; + break; + case ARMII::AddrMode5: + if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub) + return false; + OffVal = ARM_AM::getAM5Offset(OffVal); + NumBits = 8; + Scale = 4; + break; + case ARMII::AddrMode5FP16: + if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub) + return false; + OffVal = ARM_AM::getAM5FP16Offset(OffVal); + NumBits = 8; + Scale = 2; + break; + case ARMII::AddrModeT2_i8: + NumBits = 8; + break; + case ARMII::AddrModeT2_i8s4: + case ARMII::AddrModeT2_ldrex: + NumBits = 8; + Scale = 4; + break; + case ARMII::AddrModeT2_i12: + case ARMII::AddrMode_i12: + NumBits = 12; + break; + case ARMII::AddrModeT2_i7: + NumBits = 7; + break; + case ARMII::AddrModeT2_i7s2: + NumBits = 7; + Scale = 2; + break; + case ARMII::AddrModeT2_i7s4: + NumBits = 7; + Scale = 4; + break; + case ARMII::AddrModeT1_s: // SP-relative LD/ST + NumBits = 8; + Scale = 4; + break; + default: + llvm_unreachable("Unsupported addressing mode!"); + } + // Make sure the offset is encodable for instructions that scale the + // immediate. + if (((OffVal * Scale + Fixup) & (Scale - 1)) != 0) + return false; + + OffVal += Fixup / Scale; + + unsigned Mask = (1 << NumBits) - 1; + + if (OffVal <= Mask) { + if (Updt) + MI->getOperand(ImmIdx).setImm(OffVal); + return true; + } + + return false; +} + +outliner::InstrType +ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const { + MachineInstr &MI = *MIT; + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction *MF = MBB->getParent(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + + // Be conservative with inline ASM + if (MI.isInlineAsm()) + return outliner::InstrType::Invisible; + + // Don't allow debug values to impact outlining type. + if (MI.isDebugInstr() || MI.isIndirectDebugValue()) + return outliner::InstrType::Invisible; + + // At this point, KILL instructions don't really tell us much so we can go + // ahead and skip over them. + if (MI.isKill()) + return outliner::InstrType::Invisible; + + // PIC instructions contain labels, outlining them would break offset + // computing. + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case ARM::t2IT: + case ARM::tPICADD: // This is used in Thumb1 and Thumb2 + case ARM::PICADD: + case ARM::PICSTR: + case ARM::PICSTRB: + case ARM::PICSTRH: + case ARM::PICLDR: + case ARM::PICLDRB: + case ARM::PICLDRH: + case ARM::PICLDRSB: + case ARM::PICLDRSH: + case ARM::t2LDRpci_pic: + case ARM::t2MOVi16_ga_pcrel: + case ARM::t2MOVTi16_ga_pcrel: + case ARM::t2MOV_ga_pcrel: + return outliner::InstrType::Illegal; + default: + break; + } + + // Is this a terminator for a basic block? + if (MI.isTerminator()) { + // Don't outline if the branch is not unconditional. + if (Opc == ARM::BX_RET || Opc == ARM::tBX_RET || Opc == ARM::MOVPCLR) { + if (MI.getOperand(0).getImm() != ARMCC::AL) + return outliner::InstrType::Illegal; + } + if (Opc == ARM::LDMIA_RET) { + if (MI.getOperand(2).getImm() != ARMCC::AL) + return outliner::InstrType::Illegal; + } + + // Is this the end of a function? + if (MI.getParent()->succ_empty()) + return outliner::InstrType::Legal; + + // It's not, so don't outline it. + return outliner::InstrType::Illegal; + } + + // Make sure none of the operands are un-outlinable. + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || + MOP.isTargetIndex()) + return outliner::InstrType::Illegal; + + // If it uses LR then don't touch it. + if (MOP.isReg() && !MOP.isImplicit() && (MOP.getReg() == ARM::LR)) + return outliner::InstrType::Illegal; + } + + // If MI is a call we might be able to outline it. We don't want to outline + // any calls that rely on the position of items on the stack. When we outline + // something containing a call, we have to emit a save and restore of LR in + // the outlined function. Currently, this always happens by saving LR to the + // stack. Thus, if we outline, say, half the parameters for a function call + // plus the call, then we'll break the callee's expectations for the layout + // of the stack. + // + // FIXME: Allow calls to functions which construct a stack frame, as long + // as they don't access arguments on the stack. + // FIXME: Figure out some way to analyze functions defined in other modules. + // We should be able to compute the memory usage based on the IR calling + // convention, even if we can't see the definition. + if (MI.isCall()) { + // Get the function associated with the call. Look at each operand and find + // the one that represents the callee and get its name. + const Function *Callee = nullptr; + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isGlobal()) { + Callee = dyn_cast(MOP.getGlobal()); + break; + } + } + + // Never outline calls to mcount. There isn't any rule that would require + // this, but the Linux kernel's "ftrace" feature depends on it. + if (Callee && Callee->getName() == "\01_mcount") + return outliner::InstrType::Illegal; + + // If we don't know anything about the callee, assume it depends on the + // stack layout of the caller. In that case, it's only legal to outline + // as a tail-call. Whitelist the call instructions we know about so we + // don't get unexpected results with call pseudo-instructions. + auto UnknownCallOutlineType = outliner::InstrType::Illegal; + if (MI.getOpcode() == ARM::BL || MI.getOpcode() == ARM::tBL || + MI.getOpcode() == ARM::BLX || MI.getOpcode() == ARM::tBLXr || + MI.getOpcode() == ARM::tBLXi) + UnknownCallOutlineType = outliner::InstrType::LegalTerminator; + + if (!Callee) + return UnknownCallOutlineType; + + // We have a function we have information about. Check it if it's something + // can safely outline. + MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); + + // We don't know what's going on with the callee at all. Don't touch it. + if (!CalleeMF) + return UnknownCallOutlineType; + + // Check if we know anything about the callee saves on the function. If we + // don't, then don't touch it, since that implies that we haven't + // computed anything about its stack frame yet. + MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || + MFI.getNumObjects() > 0) + return UnknownCallOutlineType; + + // At this point, we can say that CalleeMF ought to not pass anything on the + // stack. Therefore, we can outline it. + return outliner::InstrType::Legal; + } + + // Don't outline positions. + if (MI.isPosition()) + return outliner::InstrType::Illegal; + + // Don't touch the link register + if (MI.readsRegister(ARM::LR, &getRegisterInfo()) || + MI.modifiesRegister(ARM::LR, &getRegisterInfo())) + return outliner::InstrType::Illegal; + + // Does this use the stack? + if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) { + // True if there is no chance that any outlined candidate from this range + // could require stack fixups. That is, both + // * LR is available in the range (No save/restore around call) + // * The range doesn't include calls (No save/restore in outlined frame) + // are true. + // FIXME: This is very restrictive; the flags check the whole block, + // not just the bit we will try to outline. + bool MightNeedStackFixUp = + (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere | + MachineOutlinerMBBFlags::HasCalls)); + + // If this instruction is in a range where it *never* needs to be fixed + // up, then we can *always* outline it. This is true even if it's not + // possible to fix that instruction up. + // + // Why? Consider two equivalent instructions I1, I2 where both I1 and I2 + // use SP. Suppose that I1 sits within a range that definitely doesn't + // need stack fixups, while I2 sits in a range that does. + // + // First, I1 can be outlined as long as we *never* fix up the stack in + // any sequence containing it. I1 is already a safe instruction in the + // original program, so as long as we don't modify it we're good to go. + // So this leaves us with showing that outlining I2 won't break our + // program. + // + // Suppose I1 and I2 belong to equivalent candidate sequences. When we + // look at I2, we need to see if it can be fixed up. Suppose I2, (and + // thus I1) cannot be fixed up. Then I2 will be assigned an unique + // integer label; thus, I2 cannot belong to any candidate sequence (a + // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up + // as well, so we're good. Thus, I1 is always safe to outline. + if (!MightNeedStackFixUp) + return outliner::InstrType::Legal; + + // Any modification of SP will break our code to save/restore LR. + // FIXME: We could handle some instructions which add a constant offset to + // SP, with a bit more work. + if (MI.modifiesRegister(ARM::SP, TRI)) + return outliner::InstrType::Illegal; + + // At this point, we have a stack instruction that we might need to fix up. + // up. We'll handle it if it's a load or store. + if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), + false)) + // It's in range, so we can outline it. + return outliner::InstrType::Legal; + + // We can't fix it up, so don't outline it. + return outliner::InstrType::Illegal; + } + + return outliner::InstrType::Legal; +} + +void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { + for (MachineInstr &MI : MBB) { + if (!MI.mayLoadOrStore()) + continue; + checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true); + } +} + +void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It) const { + unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; + int Align = -Subtarget.getStackAlignment().value(); + BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP) + .addReg(ARM::LR, RegState::Kill) + .addReg(ARM::SP) + .addImm(Align) + .add(predOps(ARMCC::AL)); +} + +void ARMBaseInstrInfo::restoreLRFromStack( + MachineBasicBlock &MBB, MachineBasicBlock::iterator &It) const { + unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; + MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP); + if (!Subtarget.isThumb()) + MIB.addReg(0); + MIB.addImm(Subtarget.getStackAlignment().value()).add(predOps(ARMCC::AL)); +} + +void ARMBaseInstrInfo::buildOutlinedFrame( + MachineBasicBlock &MBB, MachineFunction &MF, + const outliner::OutlinedFunction &OF) const { + unsigned StackAlignment = Subtarget.getStackAlignment().value(); + // For thunk outlining, rewrite the last instruction from a call to a + // tail-call. + if (OF.FrameConstructionID == MachineOutlinerThunk) { + MachineInstr *Call = &*--MBB.instr_end(); + if (Subtarget.isThumb()) + if (Call->getOperand(2).isReg()) + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPr)) + .add(Call->getOperand(2)); + else if (Subtarget.isTargetMachO()) + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPd)) + .add(Call->getOperand(2)) + .add(predOps(ARMCC::AL)); + else + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPdND)) + .add(Call->getOperand(2)) + .add(predOps(ARMCC::AL)); + else if (Call->getOperand(0).isReg()) + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::TAILJMPr)) + .add(Call->getOperand(0)); + else + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::TAILJMPd)) + .add(Call->getOperand(0)); + Call->eraseFromParent(); + } + // Is there a call in the outlined range? + auto IsNonTailCall = [](MachineInstr &MI) { + return MI.isCall() && !MI.isReturn(); + }; + if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) { + // LR has to be a live in so that we can save it. + MBB.addLiveIn(ARM::LR); + + MachineBasicBlock::iterator It = MBB.begin(); + MachineBasicBlock::iterator Et = MBB.end(); + + if (OF.FrameConstructionID == MachineOutlinerTailCall || + OF.FrameConstructionID == MachineOutlinerThunk) + Et = std::prev(MBB.end()); + + // Insert a save before the outlined region + saveLROnStack(MBB, It); + + // Fix up the instructions in the range, since we're going to modify the + // stack. + assert(OF.FrameConstructionID != MachineOutlinerDefault && + "Can only fix up stack references once"); + fixupPostOutline(MBB); + + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const MCRegisterInfo *MRI = STI.getRegisterInfo(); + unsigned DwarfReg = MRI->getDwarfRegNum(ARM::LR, true); + + // Add a CFI saying the stack was moved down. + int64_t StackPosEntry = + MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, + StackAlignment)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(StackPosEntry) + .setMIFlags(MachineInstr::FrameSetup); + + // Add a CFI saying that the LR that we want to find is now higher than + // before. + int64_t LRPosEntry = + MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, + StackAlignment)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(LRPosEntry) + .setMIFlags(MachineInstr::FrameSetup); + + // Insert a restore before the terminator for the function. + // Restore the link register. + restoreLRFromStack(MBB, Et); + } + + // If this is a tail call outlined function, then there's already a return. + if (OF.FrameConstructionID == MachineOutlinerTailCall || + OF.FrameConstructionID == MachineOutlinerThunk) + return; + + // It's not a tail call, so we have to insert the return ourselves. Get the + // correct opcode from current feature set. + BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode())) + .add(predOps(ARMCC::AL)); + + // Did we have to modify the stack by saving the link register? + if (OF.FrameConstructionID != MachineOutlinerDefault) + return; + + // We modified the stack. + // Walk over the basic block and fix up all the stack accesses. + fixupPostOutline(MBB); +} + +MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( + Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, + MachineFunction &MF, const outliner::Candidate &C) const { + MachineInstrBuilder MIB; + MachineBasicBlock::iterator CallPt; + unsigned Opc; + bool isThumb = Subtarget.isThumb(); + + // Are we tail calling? + if (C.CallConstructionID == MachineOutlinerTailCall) { + // If yes, then we can just branch to the label. + Opc = isThumb + ? (Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) + : ARM::TAILJMPd; + MIB = BuildMI(MF, DebugLoc(), get(Opc)) + .addGlobalAddress(M.getNamedValue(MF.getName())); + if (isThumb) + MIB.add(predOps(ARMCC::AL)); + It = MBB.insert(It, MIB); + return It; + } + + // Create the call instruction. + Opc = isThumb ? ARM::tBL : ARM::BL; + MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc)); + ; + if (isThumb) + CallMIB.add(predOps(ARMCC::AL)); + CallMIB.addGlobalAddress(M.getNamedValue(MF.getName())); + + // Are we saving the link register? + if (C.CallConstructionID == MachineOutlinerNoLRSave || + C.CallConstructionID == MachineOutlinerThunk) { + // No, so just insert the call. + It = MBB.insert(It, CallMIB); + return It; + } + + MBB.addLiveIn(ARM::LR); + // Can we save to a register? + if (C.CallConstructionID == MachineOutlinerRegSave) { + unsigned Reg = findRegisterToSaveLRTo(C); + assert(Reg != 0 && "No callee-saved register available?"); + + // Save and restore LR from that register. + copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true); + CallPt = MBB.insert(It, CallMIB); + copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true); + It--; + return CallPt; + } + + // We have the default case. Save and restore from SP. + saveLROnStack(MBB, It); + CallPt = MBB.insert(It, CallMIB); + restoreLRFromStack(MBB, It); + It--; + return CallPt; +} diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -244,6 +244,10 @@ } initAsmInfo(); + + // ARM supports the MachineOutliner. + setMachineOutliner(true); + setSupportsDefaultOutlining(false); } ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; @@ -359,6 +363,7 @@ void addPreRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; + void addPreEmitPass2() override; std::unique_ptr getCSEConfig() const override; }; @@ -538,9 +543,12 @@ // Don't optimize barriers at -O0. if (getOptLevel() != CodeGenOpt::None) addPass(createARMOptimizeBarriersPass()); +} +void ARMPassConfig::addPreEmitPass2() { addPass(createARMConstantIslandPass()); - addPass(createARMLowOverheadLoopsPass()); + if (!MachineOutlinerEnabled) + addPass(createARMLowOverheadLoopsPass()); // Identify valid longjmp targets for Windows Control Flow Guard. if (TM->getTargetTriple().isOSWindows()) diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -162,15 +162,15 @@ ; CHECK-NEXT: Thumb2 instruction size reduce pass ; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: optimise barriers pass +; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: StackMap Liveness Analysis +; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: ARM constant island placement and branch shortening pass ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: ReachingDefAnalysis ; CHECK-NEXT: ARM Low Overhead Loops pass -; CHECK-NEXT: Contiguously Lay Out Funclets -; CHECK-NEXT: StackMap Liveness Analysis -; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: ARM Assembly Printer diff --git a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir @@ -0,0 +1,169 @@ +# RUN: llc -mtriple=armv7-- -run-pass=prologepilog -run-pass=machine-outliner \ +# RUN: -verify-machineinstrs %s -o - | FileCheck %s + +--- | + define void @CheckAddrMode_i12() { ret void } + define void @CheckAddrMode3() { ret void } + define void @CheckAddrMode5() { ret void } + define void @CheckAddrMode5FP16() { ret void } + define void @foo() { ret void } + +... +--- + +name: CheckAddrMode_i12 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r0 + ; CHECK-LABEL: name: CheckAddrMode_i12 + ; CHECK: $r1 = MOVr killed $r0, 14, $noreg, $noreg + ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I12:[0-9]+]] + ; CHECK-NEXT: $r5 = LDRi12 $sp, 4088, 14, $noreg + $r1 = MOVr killed $r0, 14, $noreg, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $r1 = LDRi12 $sp, 0, 14, $noreg + $r2 = LDRi12 $sp, 8, 14, $noreg + $r3 = LDRi12 $sp, 10, 14, $noreg + $r4 = LDRi12 $sp, 4086, 14, $noreg + $r5 = LDRi12 $sp, 4088, 14, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $r1 = LDRi12 $sp, 0, 14, $noreg + $r2 = LDRi12 $sp, 8, 14, $noreg + $r3 = LDRi12 $sp, 10, 14, $noreg + $r4 = LDRi12 $sp, 4086, 14, $noreg + $r5 = LDRi12 $sp, 4088, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrMode3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r1 + ; CHECK-LABEL: name: CheckAddrMode3 + ; CHECK: $r0 = MOVr killed $r1, 14, $noreg, $noreg + ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I3:[0-9]+]] + ; CHECK-NEXT: $r5 = LDRSH $sp, $noreg, 248, 14, $noreg + $r0 = MOVr killed $r1, 14, $noreg, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $r1 = LDRSH $sp, $noreg, 0, 14, $noreg + $r2 = LDRSH $sp, $noreg, 8, 14, $noreg + $r3 = LDRSH $sp, $noreg, 10, 14, $noreg + $r4 = LDRSH $sp, $noreg, 247, 14, $noreg + $r5 = LDRSH $sp, $noreg, 248, 14, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $r1 = LDRSH $sp, $noreg, 0, 14, $noreg + $r2 = LDRSH $sp, $noreg, 8, 14, $noreg + $r3 = LDRSH $sp, $noreg, 10, 14, $noreg + $r4 = LDRSH $sp, $noreg, 247, 14, $noreg + $r5 = LDRSH $sp, $noreg, 248, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrMode5 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r2 + ; CHECK-LABEL: name: CheckAddrMode5 + ; CHECK: $r0 = MOVr killed $r2, 14, $noreg, $noreg + ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I5:[0-9]+]] + ; CHECK-NEXT: $d4 = VLDRD $sp, 254, 14, $noreg + $r0 = MOVr killed $r2, 14, $noreg, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $d0 = VLDRD $sp, 0, 14, $noreg + $d1 = VLDRD $sp, 8, 14, $noreg + $d2 = VLDRD $sp, 10, 14, $noreg + $d3 = VLDRD $sp, 253, 14, $noreg + $d4 = VLDRD $sp, 254, 14, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $d0 = VLDRD $sp, 0, 14, $noreg + $d1 = VLDRD $sp, 8, 14, $noreg + $d2 = VLDRD $sp, 10, 14, $noreg + $d3 = VLDRD $sp, 253, 14, $noreg + $d4 = VLDRD $sp, 254, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrMode5FP16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r3 + ; CHECK-LABEL: name: CheckAddrMode5FP16 + ; CHECK: $r0 = MOVr killed $r3, 14, $noreg, $noreg + ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I5FP16:[0-9]+]] + ; CHECK-NEXT: $s5 = VLDRH $sp, 252, 14, $noreg + $r0 = MOVr killed $r3, 14, $noreg, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $s1 = VLDRH $sp, 0, 14, $noreg + $s2 = VLDRH $sp, 8, 14, $noreg + $s3 = VLDRH $sp, 10, 14, $noreg + $s4 = VLDRH $sp, 240, 14, $noreg + $s5 = VLDRH $sp, 252, 14, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $s1 = VLDRH $sp, 0, 14, $noreg + $s2 = VLDRH $sp, 8, 14, $noreg + $s3 = VLDRH $sp, 10, 14, $noreg + $s4 = VLDRH $sp, 240, 14, $noreg + $s5 = VLDRH $sp, 252, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: foo +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + + BX_RET 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I5FP16]] + ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: $s1 = VLDRH $sp, 4, 14, $noreg + ;CHECK-NEXT: $s2 = VLDRH $sp, 12, 14, $noreg + ;CHECK-NEXT: $s3 = VLDRH $sp, 14, 14, $noreg + ;CHECK-NEXT: $s4 = VLDRH $sp, 244, 14, $noreg + ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I3]] + ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: $r1 = LDRSH $sp, $noreg, 8, 14, $noreg + ;CHECK-NEXT: $r2 = LDRSH $sp, $noreg, 16, 14, $noreg + ;CHECK-NEXT: $r3 = LDRSH $sp, $noreg, 18, 14, $noreg + ;CHECK-NEXT: $r4 = LDRSH $sp, $noreg, 255, 14, $noreg + ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I5]] + ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: $d0 = VLDRD $sp, 2, 14, $noreg + ;CHECK-NEXT: $d1 = VLDRD $sp, 10, 14, $noreg + ;CHECK-NEXT: $d2 = VLDRD $sp, 12, 14, $noreg + ;CHECK-NEXT: $d3 = VLDRD $sp, 255, 14, $noreg + ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I12]] + ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: $r1 = LDRi12 $sp, 8, 14, $noreg + ;CHECK-NEXT: $r2 = LDRi12 $sp, 16, 14, $noreg + ;CHECK-NEXT: $r3 = LDRi12 $sp, 18, 14, $noreg + ;CHECK-NEXT: $r4 = LDRi12 $sp, 4094, 14, $noreg + ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14, $noreg + diff --git a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir @@ -0,0 +1,334 @@ +# RUN: llc -mtriple=thumbv7-- -run-pass=prologepilog \ +# RUN: -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s + +--- | + define void @CheckAddrModeT2_i12() { ret void } + define void @CheckAddrModeT2_i8() { ret void } + define void @CheckAddrModeT2_i8s4() { ret void } + define void @CheckAddrModeT2_ldrex() { ret void } + define void @CheckAddrModeT2_i7() { ret void } + define void @CheckAddrModeT2_i7s2() { ret void } + define void @CheckAddrModeT2_i7s4() { ret void } + define void @CheckAddrModeT1_s() { ret void } + define void @foo() { ret void } + +... +--- + +name: CheckAddrModeT2_i12 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0 + ;CHECK-LABEL: name: CheckAddrModeT2_i12 + ;CHECK: $r1 = tMOVr killed $r0, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I12:[0-9]+]] + ;CHECK-NEXT: $r5 = t2LDRi12 $sp, 4088, 14, $noreg + $r1 = tMOVr killed $r0, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + $r1 = t2LDRi12 $sp, 0, 14, $noreg + $r2 = t2LDRi12 $sp, 8, 14, $noreg + $r3 = t2LDRi12 $sp, 10, 14, $noreg + $r4 = t2LDRi12 $sp, 4086, 14, $noreg + $r5 = t2LDRi12 $sp, 4088, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + $r1 = t2LDRi12 $sp, 0, 14, $noreg + $r2 = t2LDRi12 $sp, 8, 14, $noreg + $r3 = t2LDRi12 $sp, 10, 14, $noreg + $r4 = t2LDRi12 $sp, 4086, 14, $noreg + $r5 = t2LDRi12 $sp, 4088, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrModeT2_i8 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r1, $r2, $r3, $r4 + ;CHECK-LABEL: name: CheckAddrModeT2_i8 + ;CHECK: $r0 = tMOVr $r1, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I8:[0-9]+]] + ;CHECK-NEXT: t2STRHi8 $r4, $sp, 248, 14, $noreg + $r0 = tMOVr $r1, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + t2STRHi8 $r0, $sp, 0, 14, $noreg + t2STRHi8 $r1, $sp, 8, 14, $noreg + t2STRHi8 $r2, $sp, 10, 14, $noreg + t2STRHi8 $r3, $sp, 247, 14, $noreg + t2STRHi8 $r4, $sp, 248, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + t2STRHi8 $r0, $sp, 0, 14, $noreg + t2STRHi8 $r1, $sp, 8, 14, $noreg + t2STRHi8 $r2, $sp, 10, 14, $noreg + t2STRHi8 $r3, $sp, 247, 14, $noreg + t2STRHi8 $r4, $sp, 248, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrModeT2_i8s4 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r1, $r2, $r3, $r4, $r5 + ;CHECK-LABEL: name: CheckAddrModeT2_i8s4 + ;CHECK: $r0 = tMOVr $r2, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I8S4:[0-9]+]] + ;CHECK-NEXT: t2STRDi8 $r4, $r5, $sp, 254, 14, $noreg + $r0 = tMOVr $r2, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + t2STRDi8 $r0, $r1, $sp, 0, 14, $noreg + t2STRDi8 $r1, $r2, $sp, 8, 14, $noreg + t2STRDi8 $r2, $r3, $sp, 10, 14, $noreg + t2STRDi8 $r3, $r4, $sp, 253, 14, $noreg + t2STRDi8 $r4, $r5, $sp, 254, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + t2STRDi8 $r0, $r1, $sp, 0, 14, $noreg + t2STRDi8 $r1, $r2, $sp, 8, 14, $noreg + t2STRDi8 $r2, $r3, $sp, 10, 14, $noreg + t2STRDi8 $r3, $r4, $sp, 253, 14, $noreg + t2STRDi8 $r4, $r5, $sp, 254, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrModeT2_ldrex +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r1, $r2, $r3, $r4, $r5 + ;CHECK-LABEL: name: CheckAddrModeT2_ldrex + ;CHECK: $r0 = tMOVr $r2, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[LDREX:[0-9]+]] + ;CHECK-NEXT: t2LDREX $sp, 254, 14, $noreg + $r0 = tMOVr $r2, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + $r0 = t2LDREX $sp, 0, 14, $noreg + $r1 = t2LDREX $sp, 8, 14, $noreg + $r2 = t2LDREX $sp, 10, 14, $noreg + $r3 = t2LDREX $sp, 253, 14, $noreg + $r4 = t2LDREX $sp, 254, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + $r0 = t2LDREX $sp, 0, 14, $noreg + $r1 = t2LDREX $sp, 8, 14, $noreg + $r2 = t2LDREX $sp, 10, 14, $noreg + $r3 = t2LDREX $sp, 253, 14, $noreg + $r4 = t2LDREX $sp, 254, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrModeT2_i7 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r2, $q0, $q1, $q2, $q3, $q4 + ;CHECK-LABEL: name: CheckAddrModeT2_i7 + ;CHECK: $r0 = tMOVr $r2, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I7:[0-9]+]] + ;CHECK-NEXT: MVE_VSTRBU8 $q4, $sp, 120, 0, $noreg + $r0 = tMOVr $r2, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + MVE_VSTRBU8 $q0, $sp, 0, 0, $noreg + MVE_VSTRBU8 $q1, $sp, 8, 0, $noreg + MVE_VSTRBU8 $q2, $sp, 10, 0, $noreg + MVE_VSTRBU8 $q3, $sp, 119, 0, $noreg + MVE_VSTRBU8 $q4, $sp, 120, 0, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + MVE_VSTRBU8 $q0, $sp, 0, 0, $noreg + MVE_VSTRBU8 $q1, $sp, 8, 0, $noreg + MVE_VSTRBU8 $q2, $sp, 10, 0, $noreg + MVE_VSTRBU8 $q3, $sp, 119, 0, $noreg + MVE_VSTRBU8 $q4, $sp, 120, 0, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrModeT2_i7s2 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r2, $q0, $q1, $q2, $q3, $q4 + ;CHECK-LABEL: name: CheckAddrModeT2_i7s2 + ;CHECK: $r0 = tMOVr $r2, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I7S2:[0-9]+]] + ;CHECK-NEXT: MVE_VSTRHU16 $q4, $sp, 124, 0, $noreg + $r0 = tMOVr $r2, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + MVE_VSTRHU16 $q0, $sp, 0, 0, $noreg + MVE_VSTRHU16 $q1, $sp, 8, 0, $noreg + MVE_VSTRHU16 $q2, $sp, 10, 0, $noreg + MVE_VSTRHU16 $q3, $sp, 119, 0, $noreg + MVE_VSTRHU16 $q4, $sp, 124, 0, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + MVE_VSTRHU16 $q0, $sp, 0, 0, $noreg + MVE_VSTRHU16 $q1, $sp, 8, 0, $noreg + MVE_VSTRHU16 $q2, $sp, 10, 0, $noreg + MVE_VSTRHU16 $q3, $sp, 119, 0, $noreg + MVE_VSTRHU16 $q4, $sp, 124, 0, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrModeT2_i7s4 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r2, $q0, $q1, $q2, $q3, $q4 + ;CHECK-LABEL: name: CheckAddrModeT2_i7s4 + ;CHECK: $r0 = tMOVr $r2, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I7S4:[0-9]+]] + ;CHECK-NEXT: MVE_VSTRWU32 $q4, $sp, 126, 0, $noreg + $r0 = tMOVr $r2, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + MVE_VSTRWU32 $q0, $sp, 0, 0, $noreg + MVE_VSTRWU32 $q1, $sp, 8, 0, $noreg + MVE_VSTRWU32 $q2, $sp, 10, 0, $noreg + MVE_VSTRWU32 $q3, $sp, 125, 0, $noreg + MVE_VSTRWU32 $q4, $sp, 126, 0, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + MVE_VSTRWU32 $q0, $sp, 0, 0, $noreg + MVE_VSTRWU32 $q1, $sp, 8, 0, $noreg + MVE_VSTRWU32 $q2, $sp, 10, 0, $noreg + MVE_VSTRWU32 $q3, $sp, 125, 0, $noreg + MVE_VSTRWU32 $q4, $sp, 126, 0, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrModeT1_s +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6 + ;CHECK-LABEL: name: CheckAddrModeT1_s + ;CHECK: $r0 = tMOVr $r3, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[T1_S:[0-9]+]] + ;CHECK-NEXT: tSTRspi $r6, $sp, 254, 14, $noreg + $r0 = tMOVr $r3, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + tSTRspi $r0, $sp, 0, 14, $noreg + tSTRspi $r1, $sp, 4, 14, $noreg + tSTRspi $r2, $sp, 8, 14, $noreg + tSTRspi $r3, $sp, 12, 14, $noreg + tSTRspi $r4, $sp, 16, 14, $noreg + tSTRspi $r5, $sp, 253, 14, $noreg + tSTRspi $r6, $sp, 254, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + tSTRspi $r0, $sp, 0, 14, $noreg + tSTRspi $r1, $sp, 4, 14, $noreg + tSTRspi $r2, $sp, 8, 14, $noreg + tSTRspi $r3, $sp, 12, 14, $noreg + tSTRspi $r4, $sp, 16, 14, $noreg + tSTRspi $r5, $sp, 253, 14, $noreg + tSTRspi $r6, $sp, 254, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: foo +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + + BX_RET 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I7S4]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: MVE_VSTRWU32 $q0, $sp, 2, 0, $noreg + ;CHECK-NEXT: MVE_VSTRWU32 $q1, $sp, 10, 0, $noreg + ;CHECK-NEXT: MVE_VSTRWU32 $q2, $sp, 12, 0, $noreg + ;CHECK-NEXT: MVE_VSTRWU32 $q3, $sp, 127, 0, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I8]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: t2STRHi8 $r0, $sp, 8, 14, $noreg + ;CHECK-NEXT: t2STRHi8 $r1, $sp, 16, 14, $noreg + ;CHECK-NEXT: t2STRHi8 $r2, $sp, 18, 14, $noreg + ;CHECK-NEXT: t2STRHi8 $r3, $sp, 255, 14, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I7S2]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: MVE_VSTRHU16 $q0, $sp, 4, 0, $noreg + ;CHECK-NEXT: MVE_VSTRHU16 $q1, $sp, 12, 0, $noreg + ;CHECK-NEXT: MVE_VSTRHU16 $q2, $sp, 14, 0, $noreg + ;CHECK-NEXT: MVE_VSTRHU16 $q3, $sp, 123, 0, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[LDREX]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: $r0 = t2LDREX $sp, 2, 14, $noreg + ;CHECK-NEXT: $r1 = t2LDREX $sp, 10, 14, $noreg + ;CHECK-NEXT: $r2 = t2LDREX $sp, 12, 14, $noreg + ;CHECK-NEXT: $r3 = t2LDREX $sp, 255, 14, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I7]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: MVE_VSTRBU8 $q0, $sp, 8, 0, $noreg + ;CHECK-NEXT: MVE_VSTRBU8 $q1, $sp, 16, 0, $noreg + ;CHECK-NEXT: MVE_VSTRBU8 $q2, $sp, 18, 0, $noreg + ;CHECK-NEXT: MVE_VSTRBU8 $q3, $sp, 127, 0, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I12]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: $r1 = t2LDRi12 $sp, 8, 14, $noreg + ;CHECK-NEXT: $r2 = t2LDRi12 $sp, 16, 14, $noreg + ;CHECK-NEXT: $r3 = t2LDRi12 $sp, 18, 14, $noreg + ;CHECK-NEXT: $r4 = t2LDRi12 $sp, 4094, 14, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I8S4]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 2, 14, $noreg + ;CHECK-NEXT: t2STRDi8 $r1, $r2, $sp, 10, 14, $noreg + ;CHECK-NEXT: t2STRDi8 $r2, $r3, $sp, 12, 14, $noreg + ;CHECK-NEXT: t2STRDi8 $r3, $r4, $sp, 255, 14, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[T1_S]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: tSTRspi $r0, $sp, 2, 14, $noreg + ;CHECK-NEXT: tSTRspi $r1, $sp, 6, 14, $noreg + ;CHECK-NEXT: tSTRspi $r2, $sp, 10, 14, $noreg + ;CHECK-NEXT: tSTRspi $r3, $sp, 14, 14, $noreg + ;CHECK-NEXT: tSTRspi $r4, $sp, 18, 14, $noreg + ;CHECK-NEXT: tSTRspi $r5, $sp, 255, 14, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg diff --git a/llvm/test/CodeGen/ARM/machine-outliner-tail.ll b/llvm/test/CodeGen/ARM/machine-outliner-tail.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-tail.ll @@ -0,0 +1,42 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=arm-- \ +; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=thumbv7-- \ +; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB +; RUN: llc -verify-machineinstrs -enable-machine-outliner \ +; RUN: -mtriple=thumbv7-apple-darwin --stop-after=machine-outliner < %s \ +; RUN: | FileCheck %s --check-prefix=MACHO + +; ARM-LABEL: name: OUTLINED_FUNCTION_0 +; ARM: $r0 = MOVi 1, 14, $noreg, $noreg +; ARM-NEXT: $r1 = MOVi 2, 14, $noreg, $noreg +; ARM-NEXT: $r2 = MOVi 3, 14, $noreg, $noreg +; ARM-NEXT: $r3 = MOVi 4, 14, $noreg, $noreg +; ARM-NEXT: TAILJMPd @z + +; THUMB-LABEL: name: OUTLINED_FUNCTION_0 +; THUMB: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; THUMB-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; THUMB-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; THUMB-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; THUMB-NEXT: tTAILJMPdND @z, 14, $noreg + +; MACHO-LABEL: name: OUTLINED_FUNCTION_0 +; MACHO: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; MACHO-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; MACHO-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; MACHO-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; MACHO-NEXT: tTAILJMPd @z, 14, $noreg + +define void @a() { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} + +declare void @z(i32, i32, i32, i32) + +define dso_local void @b(i32* nocapture readnone %p) { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} diff --git a/llvm/test/CodeGen/ARM/machine-outliner-thunk.ll b/llvm/test/CodeGen/ARM/machine-outliner-thunk.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-thunk.ll @@ -0,0 +1,111 @@ +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=armv7-- \ +; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv7-- \ +; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB +; RUN: llc -enable-machine-outliner -verify-machineinstrs \ +; RUN: -mtriple=thumbv7-apple-darwin -stop-after=machine-outliner < %s \ +; RUN: | FileCheck %s --check-prefix=MACHO + +declare i32 @thunk_called_fn(i32, i32, i32, i32) + +define i32 @a() { +; ARM-LABEL: name: a +; ARM: bb.0.entry: +; ARM-NEXT: liveins: $r11, $lr +; ARM: $sp = frame-setup STMDB_UPD $sp, 14, $noreg, killed $r11, killed $lr +; ARM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r11, -8 +; ARM-NEXT: BL @OUTLINED_FUNCTION_0{{.*}} +; ARM-NEXT: renamable $r0 = ADDri killed renamable $r0, 8, 14, $noreg, $noreg +; ARM-NEXT: $sp = LDMIA_RET $sp, 14, $noreg, def $r11, def $pc, implicit killed $r0 + +; THUMB-LABEL: name: a +; THUMB: bb.0.entry: +; THUMB-NEXT: liveins: $r7, $lr +; THUMB: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr +; THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8 +; THUMB-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; THUMB-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14, $noreg +; THUMB-NEXT: tPOP_RET 14, $noreg, def $r7, def $pc + +; MACHO-LABEL: name: a +; MACHO: bb.0.entry: +; MACHO-NEXT: liveins: $lr +; MACHO: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14, $noreg +; MACHO-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 4 +; MACHO-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; MACHO-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; MACHO-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14, $noreg +; MACHO-NEXT: $lr, $sp = t2LDR_POST $sp, 4, 14, $noreg +; MACHO-NEXT: tBX_RET 14, $noreg, implicit killed $r0 +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 8 + ret i32 %cx +} + +define i32 @b() { +; ARM-LABEL: name: b +; ARM: bb.0.entry: +; ARM-NEXT: liveins: $r11, $lr +; ARM: $sp = frame-setup STMDB_UPD $sp, 14, $noreg, killed $r11, killed $lr +; ARM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r11, -8 +; ARM-NEXT: BL @OUTLINED_FUNCTION_0{{.*}} +; ARM-NEXT: renamable $r0 = ADDri killed renamable $r0, 88, 14, $noreg, $noreg +; ARM-NEXT: $sp = LDMIA_RET $sp, 14, $noreg, def $r11, def $pc, implicit killed $r0 + +; THUMB-LABEL: name: b +; THUMB: bb.0.entry: +; THUMB-NEXT: liveins: $r7, $lr +; THUMB: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr +; THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8 +; THUMB-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; THUMB-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14, $noreg +; THUMB-NEXT: tPOP_RET 14, $noreg, def $r7, def $pc + +; MACHO-LABEL: name: b +; MACHO: bb.0.entry: +; MACHO-NEXT: liveins: $lr +; MACHO: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14, $noreg +; MACHO-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 4 +; MACHO-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; MACHO-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; MACHO-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14, $noreg +; MACHO-NEXT: $lr, $sp = t2LDR_POST $sp, 4, 14, $noreg +; MACHO-NEXT: tBX_RET 14, $noreg, implicit killed $r0 +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 88 + ret i32 %cx +} + +; ARM-LABEL: name: OUTLINED_FUNCTION_0 +; ARM: bb.0: +; ARM-NEXT: $r0 = MOVi 1, 14, $noreg, $noreg +; ARM-NEXT: $r1 = MOVi 2, 14, $noreg, $noreg +; ARM-NEXT: $r2 = MOVi 3, 14, $noreg, $noreg +; ARM-NEXT: $r3 = MOVi 4, 14, $noreg, $noreg +; ARM-NEXT: TAILJMPd @thunk_called_fn, implicit $sp + +; THUMB-LABEL: name: OUTLINED_FUNCTION_0 +; THUMB: bb.0: +; THUMB-NEXT: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; THUMB-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; THUMB-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; THUMB-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; THUMB-NEXT: tTAILJMPdND @thunk_called_fn, 14, $noreg, implicit $sp + +; MACHO-LABEL: name: OUTLINED_FUNCTION_0 +; MACHO: bb.0: +; MACHO-NEXT: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; MACHO-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; MACHO-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; MACHO-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; MACHO-NEXT: tTAILJMPd @thunk_called_fn, 14, $noreg, implicit $sp diff --git a/llvm/test/CodeGen/ARM/machine-outliner.ll b/llvm/test/CodeGen/ARM/machine-outliner.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner.ll @@ -0,0 +1,135 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=arm-- \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ARM +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=thumbv7-- \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,THUMB +; RUN: llc -verify-machineinstrs -enable-machine-outliner \ +; RUN: -enable-linkonceodr-outlining -mtriple=arm-- < %s | FileCheck %s \ +; RUN: --check-prefix=ODR +; RUN: llc -verify-machineinstrs -enable-machine-outliner \ +; RUN: -enable-linkonceodr-outlining -mtriple=thumbv7-- < %s | FileCheck %s \ +; RUN: --check-prefix=ODR +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=arm-- \ +; RUN: -stop-after=machine-outliner < %s | FileCheck %s \ +; RUN: --check-prefix=TARGET_FEATURES + + +; Make sure that we inherit target features from functions and make sure we have +; the right function attributes. +; TARGET_FEATURES: define internal void @OUTLINED_FUNCTION_{{[0-9]+}}() +; TARGET_FEATURES-SAME: #[[ATTR_NUM:[0-9]+]] +; TARGET_FEATURES-DAG: attributes #[[ATTR_NUM]] = { +; TARGET_FEATURES-SAME: minsize +; TARGET_FEATURES-SAME: optsize +; TARGET_FEATURES-SAME: "target-features"="+neon" + +define linkonce_odr void @fish() #0 { + ; CHECK-LABEL: fish: + ; CHECK-NOT: OUTLINED + ; ODR: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]] + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + ret void +} + +define void @turtle() section "TURTLE,turtle" { + ; CHECK-LABEL: turtle: + ; ODR-LABEL: turtle: + ; CHECK-NOT: OUTLINED + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + ret void +} + +define void @cat() #0 { + ; CHECK-LABEL: cat: + ; CHECK: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]] + ; ODR: [[OUTLINED]] + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + ret void +} + +define void @dog() #0 { + ; CHECK-LABEL: dog: + ; CHECK: [[OUTLINED]] + ; ODR: [[OUTLINED]] + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + ret void +} + +; ODR: [[OUTLINED]]: +; ARM: .code 32 +; ARM-NEXT: [[OUTLINED]]: +; ARM: mov r0, #2 +; ARM-NEXT: str r0, [sp, #16] +; ARM-NEXT: mov r0, #1 +; ARM-NEXT: str r0, [sp, #20] +; ARM-NEXT: mov r0, #3 +; ARM-NEXT: str r0, [sp, #12] +; ARM-NEXT: mov r0, #4 +; ARM-NEXT: str r0, [sp, #8] +; ARM-NEXT: mov r0, #5 +; ARM-NEXT: str r0, [sp, #4] +; ARM-NEXT: mov r0, #6 +; ARM-NEXT: str r0, [sp] +; ARM-NEXT: mov pc, lr + +; THUMB: .code 16 +; THUMB-NEXT: .thumb_func +; THUMB-NEXT: [[OUTLINED]]: +; THUMB: movs r0, #2 +; THUMB-NEXT: str r0, [sp, #16] +; THUMB-NEXT: movs r0, #1 +; THUMB-NEXT: str r0, [sp, #20] +; THUMB-NEXT: movs r0, #3 +; THUMB-NEXT: str r0, [sp, #12] +; THUMB-NEXT: movs r0, #4 +; THUMB-NEXT: str r0, [sp, #8] +; THUMB-NEXT: movs r0, #5 +; THUMB-NEXT: str r0, [sp, #4] +; THUMB-NEXT: movs r0, #6 +; THUMB-NEXT: str r0, [sp] +; THUMB-NEXT: bx lr + +attributes #0 = { nounwind "target-cpu"="cortex-a53" "target-features"="+neon" } diff --git a/llvm/test/CodeGen/ARM/machine-outliner.mir b/llvm/test/CodeGen/ARM/machine-outliner.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner.mir @@ -0,0 +1,72 @@ +# RUN: llc -mtriple=arm-- -run-pass=machine-outliner -verify-machineinstrs \ +# RUN: %s -o - | FileCheck %s +# RUN: llc -mtriple=thumbv7-- -run-pass=machine-outliner -verify-machineinstrs \ +# RUN: %s -o - | FileCheck %s + +--- | + define void @outline_1() #0 { ret void } + define void @outline_2() #0 { ret void } + define void @outline_3() #0 { ret void } + define void @dont_outline() { ret void } + + attributes #0 = { minsize optsize } +... +--- + +name: outline_1 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: OUTLINED + liveins: $r2 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + BX_RET 14, $noreg +... +--- + +name: outline_2 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: OUTLINED + liveins: $r2 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + BX_RET 14, $noreg +... +--- + +name: outline_3 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: OUTLINED + liveins: $r2 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + BX_RET 14, $noreg +... +--- + +name: dont_outline +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + liveins: $lr, $r2 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + BX_RET 14, $noreg