diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6149,11 +6149,12 @@ if (Arg *A = Args.getLastArg(options::OPT_moutline, options::OPT_mno_outline)) { if (A->getOption().matches(options::OPT_moutline)) { - // We only support -moutline in AArch64 right now. If we're not compiling - // for AArch64, emit a warning and ignore the flag. Otherwise, add the - // proper mllvm flags. - if (Triple.getArch() != llvm::Triple::aarch64 && - Triple.getArch() != llvm::Triple::aarch64_32) { + // We only support -moutline in AArch64 and ARM targets right now. If + // we're not compiling for these, emit a warning and ignore the flag. + // Otherwise, add the proper mllvm flags. + if (!(Triple.isARM() || Triple.isThumb() || + Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32)) { D.Diag(diag::warn_drv_moutline_unsupported_opt) << Triple.getArchName(); } else { CmdArgs.push_back("-mllvm"); diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -356,6 +356,22 @@ ArrayRef> getSerializableBitmaskMachineOperandTargetFlags() const override; + /// ARM supports the MachineOutliner. + bool isFunctionSafeToOutlineFrom(MachineFunction &MF, + bool OutlineFromLinkOnceODRs) const override; + outliner::OutlinedFunction getOutliningCandidateInfo( + std::vector &RepeatedSequenceLocs) const override; + outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const override; + bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const override; + void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, + const outliner::OutlinedFunction &OF) const override; + MachineBasicBlock::iterator + insertOutlinedCall(Module &M, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It, MachineFunction &MF, + const outliner::Candidate &C) const override; + private: unsigned getInstBundleLength(const MachineInstr &MI) const; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" @@ -5517,3 +5518,372 @@ return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) < ConstantMaterializationCost(Val2, Subtarget, !ForCodesize); } + +/// Constants defining how certain sequences should be outlined. +/// This encompasses how an outlined function should be called, and what kind of +/// frame should be emitted for that outlined function. +/// +/// \p MachineOutlinerTailCall implies that the function is being created from +/// a sequence of instructions ending in a return. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> B OUTLINED_FUNCTION I1 +/// BX LR I2 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 0 | 0 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerThunk implies that the function is being created from +/// a sequence of instructions ending in a call. The outlined function is +/// called with a BL instruction, and the outlined function tail-calls the +/// original call destination. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// BL f I2 +/// B f +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 0 | 0 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ + +enum MachineOutlinerClass { MachineOutlinerTailCall, MachineOutlinerThunk }; + +enum MachineOutlinerMBBFlags { + LRUnavailableSomewhere = 0x2, + HasCalls = 0x4, + UnsafeRegsDead = 0x8 +}; + +struct OutlinerCosts { + const int CallTailCall; + const int FrameTailCall; + const int CallThunk; + const int FrameThunk; + + OutlinerCosts(const ARMSubtarget &target) + : CallTailCall(target.isThumb() ? 4 : 4), + FrameTailCall(target.isThumb() ? 0 : 0), + CallThunk(target.isThumb() ? 4 : 4), + FrameThunk(target.isThumb() ? 0 : 0) {} +}; + +outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( + std::vector &RepeatedSequenceLocs) const { + outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; + unsigned SequenceSize = + std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0, + [this](unsigned Sum, const MachineInstr &MI) { + return Sum + getInstSizeInBytes(MI); + }); + + // Properties about candidate MBBs that hold for all of them. + unsigned FlagsSetInAll = 0xF; + + // Compute liveness information for each candidate, and set FlagsSetInAll. + const TargetRegisterInfo &TRI = getRegisterInfo(); + std::for_each( + RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; }); + + // According to the ARM Procedure Call Standard, the following are + // undefined on entry/exit from a function call: + // + // * Register R12(IP), + // * Condition codes (and thus the CPSR register) + // + // Since we control the instructions which are part of the outlined regions + // we don't need to be fully compliant with the AAPCS, but we have to + // guarantee that if a veneer is inserted at link time the code is still + // correct. Because of this, we can't outline any sequence of instructions + // where one of these registers is live into/across it. Thus, we need to + // delete those candidates. + auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { + // If the unsafe registers in this block are all dead, then we don't need + // to compute liveness here. + if (C.Flags & UnsafeRegsDead) + return false; + C.initLRU(TRI); + LiveRegUnits LRU = C.LRU; + return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR)); + }; + + // Are there any candidates where those registers are live? + if (!(FlagsSetInAll & UnsafeRegsDead)) { + // Erase every candidate that violates the restrictions above. (It could be + // true that we have viable candidates, so it's not worth bailing out in + // the case that, say, 1 out of 20 candidates violate the restructions.) + RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), + RepeatedSequenceLocs.end(), + CantGuaranteeValueAcrossCall), + RepeatedSequenceLocs.end()); + + // If the sequence doesn't have enough candidates left, then we're done. + if (RepeatedSequenceLocs.size() < 2) + return outliner::OutlinedFunction(); + } + + // At this point, we have only "safe" candidates to outline. Figure out + // frame + call instruction information. + + unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); + + // Helper lambda which sets call information for every candidate. + auto SetCandidateCallInfo = + [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { + for (outliner::Candidate &C : RepeatedSequenceLocs) + C.setCallInfo(CallID, NumBytesForCall); + }; + + OutlinerCosts *Costs = new OutlinerCosts(Subtarget); + unsigned FrameID = 0; + unsigned NumBytesToCreateFrame = 0; + + // If the last instruction in any candidate is a terminator, then we should + // tail call all of the candidates. + if (RepeatedSequenceLocs[0].back()->isTerminator()) { + FrameID = MachineOutlinerTailCall; + NumBytesToCreateFrame = Costs->FrameTailCall; + SetCandidateCallInfo(MachineOutlinerTailCall, Costs->CallTailCall); + } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX || + LastInstrOpcode == ARM::tBL || LastInstrOpcode == ARM::tBLXr || + LastInstrOpcode == ARM::tBLXi) { + FrameID = MachineOutlinerThunk; + NumBytesToCreateFrame = Costs->FrameThunk; + SetCandidateCallInfo(MachineOutlinerThunk, Costs->CallThunk); + } else + return outliner::OutlinedFunction(); + + return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, + NumBytesToCreateFrame, FrameID); +} + +bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom( + MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { + const Function &F = MF.getFunction(); + + // Can F be deduplicated by the linker? If it can, don't outline from it. + if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) + return false; + + // Don't outline from functions with section markings; the program could + // expect that all the code is in the named section. + // FIXME: Allow outlining from multiple functions with the same section + // marking. + if (F.hasSection()) + return false; + + // FIXME: Thumb1 outlining is not handled + if (MF.getInfo()->isThumb1OnlyFunction()) + return false; + + // It's safe to outline from MF. + return true; +} + +bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const { + // Check if LR is available through all of the MBB. If it's not, then set + // a flag. + assert(MBB.getParent()->getRegInfo().tracksLiveness() && + "Suitable Machine Function for outlining must track liveness"); + + LiveRegUnits LRU(getRegisterInfo()); + + std::for_each(MBB.rbegin(), MBB.rend(), + [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); + + // Check if each of the unsafe registers are available... + bool R12AvailableInBlock = LRU.available(ARM::R12); + bool CPSRAvailableInBlock = LRU.available(ARM::CPSR); + + // If all of these are dead (and not live out), we know we don't have to check + // them later. + if (R12AvailableInBlock && CPSRAvailableInBlock) + Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; + + // Now, add the live outs to the set. + LRU.addLiveOuts(MBB); + + // If any of these registers is available in the MBB, but also a live out of + // the block, then we know outlining is unsafe. + if (R12AvailableInBlock && !LRU.available(ARM::R12)) + return false; + if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR)) + return false; + + // Check if there's a call inside this MachineBasicBlock. If there is, then + // set a flag. + if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) + Flags |= MachineOutlinerMBBFlags::HasCalls; + + if (!LRU.available(ARM::LR)) + Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; + + return true; +} + +outliner::InstrType +ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const { + MachineInstr &MI = *MIT; + const TargetRegisterInfo *TRI = &getRegisterInfo(); + + // Be conservative with inline ASM + if (MI.isInlineAsm()) + return outliner::InstrType::Illegal; + + // Don't allow debug values to impact outlining type. + if (MI.isDebugInstr() || MI.isIndirectDebugValue()) + return outliner::InstrType::Invisible; + + // At this point, KILL or IMPLICIT_DEF instructions don't really tell us much + // so we can go ahead and skip over them. + if (MI.isKill() || MI.isImplicitDef()) + return outliner::InstrType::Invisible; + + // PIC instructions contain labels, outlining them would break offset + // computing. unsigned Opc = MI.getOpcode(); + unsigned Opc = MI.getOpcode(); + if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR || + Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR || + Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB || + Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic || + Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel || + Opc == ARM::t2MOV_ga_pcrel) + return outliner::InstrType::Illegal; + + // Be conservative with ARMv8.1 MVE instructions. + if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart || + Opc == ARM::t2WhileLoopStart || Opc == ARM::t2LoopDec || + Opc == ARM::t2LoopEnd) + return outliner::InstrType::Illegal; + + const MCInstrDesc &MCID = MI.getDesc(); + uint64_t MIFlags = MCID.TSFlags; + if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE) + return outliner::InstrType::Illegal; + + // Is this a terminator for a basic block? + if (MI.isTerminator()) { + // Don't outline if the branch is not unconditional. + if (isPredicated(MI)) + return outliner::InstrType::Illegal; + + // Is this the end of a function? + if (MI.getParent()->succ_empty()) + return outliner::InstrType::Legal; + + // It's not, so don't outline it. + return outliner::InstrType::Illegal; + } + + // Make sure none of the operands are un-outlinable. + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || + MOP.isTargetIndex()) + return outliner::InstrType::Illegal; + } + + // Don't outline if link register or program counter value are used. + if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI)) + return outliner::InstrType::Illegal; + + if (MI.isCall()) { + // If we don't know anything about the callee, assume it depends on the + // stack layout of the caller. In that case, it's only legal to outline + // as a tail-call. Whitelist the call instructions we know about so we + // don't get unexpected results with call pseudo-instructions. + auto UnknownCallOutlineType = outliner::InstrType::Illegal; + if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX || + Opc == ARM::tBLXr || Opc == ARM::tBLXi) + UnknownCallOutlineType = outliner::InstrType::LegalTerminator; + + return UnknownCallOutlineType; + } + + // Since calls are handled, don't touch LR or PC + if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI)) + return outliner::InstrType::Illegal; + + // Be conservative with IT blocks. + if (MI.readsRegister(ARM::ITSTATE, TRI) || + MI.modifiesRegister(ARM::ITSTATE, TRI)) + return outliner::InstrType::Illegal; + + // Don't outline positions. + if (MI.isPosition()) + return outliner::InstrType::Illegal; + + return outliner::InstrType::Legal; +} + +void ARMBaseInstrInfo::buildOutlinedFrame( + MachineBasicBlock &MBB, MachineFunction &MF, + const outliner::OutlinedFunction &OF) const { + // For thunk outlining, rewrite the last instruction from a call to a + // tail-call. + if (OF.FrameConstructionID == MachineOutlinerThunk) { + MachineInstr *Call = &*--MBB.instr_end(); + bool isThumb = Subtarget.isThumb(); + unsigned FuncOp = isThumb ? 2 : 0; + unsigned Opc = Call->getOperand(FuncOp).isReg() + ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr + : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd + : ARM::tTAILJMPdND + : ARM::TAILJMPd; + MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc)) + .add(Call->getOperand(FuncOp)); + if (isThumb && !Call->getOperand(FuncOp).isReg()) + MIB.add(predOps(ARMCC::AL)); + Call->eraseFromParent(); + } +} + +MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( + Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, + MachineFunction &MF, const outliner::Candidate &C) const { + MachineInstrBuilder MIB; + MachineBasicBlock::iterator CallPt; + unsigned Opc; + bool isThumb = Subtarget.isThumb(); + + // Are we tail calling? + if (C.CallConstructionID == MachineOutlinerTailCall) { + // If yes, then we can just branch to the label. + Opc = isThumb + ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND + : ARM::TAILJMPd; + MIB = BuildMI(MF, DebugLoc(), get(Opc)) + .addGlobalAddress(M.getNamedValue(MF.getName())); + if (isThumb) + MIB.add(predOps(ARMCC::AL)); + It = MBB.insert(It, MIB); + return It; + } + + // Create the call instruction. + Opc = isThumb ? ARM::tBL : ARM::BL; + MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc)); + if (isThumb) + CallMIB.add(predOps(ARMCC::AL)); + CallMIB.addGlobalAddress(M.getNamedValue(MF.getName())); + + // Insert the call. + It = MBB.insert(It, CallMIB); + return It; +} diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -248,6 +248,10 @@ setSupportsDebugEntryValues(true); initAsmInfo(); + + // ARM supports the MachineOutliner. + setMachineOutliner(true); + setSupportsDefaultOutlining(false); } ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; diff --git a/llvm/test/CodeGen/ARM/machine-outliner-tail.ll b/llvm/test/CodeGen/ARM/machine-outliner-tail.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-tail.ll @@ -0,0 +1,46 @@ +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=arm-- \ +; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv7-- \ +; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB +; RUN: llc -enable-machine-outliner -verify-machineinstrs \ +; RUN: -mtriple=thumbv7-apple-darwin --stop-after=machine-outliner < %s \ +; RUN: | FileCheck %s --check-prefix=MACHO +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv5-- \ +; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB1 + +; ARM-LABEL: name: OUTLINED_FUNCTION_0 +; ARM: $r0 = MOVi 1, 14 /* CC::al */, $noreg, $noreg +; ARM-NEXT: $r1 = MOVi 2, 14 /* CC::al */, $noreg, $noreg +; ARM-NEXT: $r2 = MOVi 3, 14 /* CC::al */, $noreg, $noreg +; ARM-NEXT: $r3 = MOVi 4, 14 /* CC::al */, $noreg, $noreg +; ARM-NEXT: TAILJMPd @z + +; THUMB-LABEL: name: OUTLINED_FUNCTION_0 +; THUMB: $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg +; THUMB-NEXT: $r1, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg +; THUMB-NEXT: $r2, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg +; THUMB-NEXT: $r3, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg +; THUMB-NEXT: tTAILJMPdND @z, 14 /* CC::al */, $noreg + +; MACHO-LABEL: name: OUTLINED_FUNCTION_0 +; MACHO: $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg +; MACHO-NEXT: $r1, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg +; MACHO-NEXT: $r2, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg +; MACHO-NEXT: $r3, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg +; MACHO-NEXT: tTAILJMPd @z, 14 /* CC::al */, $noreg + +; THUMB1-NOT: OUTLINED_FUNCTION_0 + +define void @a() { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} + +declare void @z(i32, i32, i32, i32) + +define dso_local void @b(i32* nocapture readnone %p) { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} diff --git a/llvm/test/CodeGen/ARM/machine-outliner-thunk.ll b/llvm/test/CodeGen/ARM/machine-outliner-thunk.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-thunk.ll @@ -0,0 +1,119 @@ +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=armv7-- \ +; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv7-- \ +; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB +; RUN: llc -enable-machine-outliner -verify-machineinstrs \ +; RUN: -mtriple=thumbv7-apple-darwin -stop-after=machine-outliner < %s \ +; RUN: | FileCheck %s --check-prefix=MACHO +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv5-- \ +; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB1 + +declare i32 @thunk_called_fn(i32, i32, i32, i32) + +define i32 @a() { +; ARM-LABEL: name: a +; ARM: bb.0.entry: +; ARM-NEXT: liveins: $r11, $lr +; ARM: $sp = frame-setup STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r11, killed $lr +; ARM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r11, -8 +; ARM-NEXT: BL @OUTLINED_FUNCTION_0{{.*}} +; ARM-NEXT: renamable $r0 = ADDri killed renamable $r0, 8, 14 /* CC::al */, $noreg, $noreg +; ARM-NEXT: $sp = frame-destroy LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r11, def $pc, implicit killed $r0 + +; THUMB-LABEL: name: a +; THUMB: bb.0.entry: +; THUMB-NEXT: liveins: $r7, $lr +; THUMB: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr +; THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8 +; THUMB-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; THUMB-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14 /* CC::al */, $noreg +; THUMB-NEXT: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc + +; MACHO-LABEL: name: a +; MACHO: bb.0.entry: +; MACHO-NEXT: liveins: $lr +; MACHO: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14 /* CC::al */, $noreg +; MACHO-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 4 +; MACHO-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; MACHO-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; MACHO-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14 /* CC::al */, $noreg +; MACHO-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg +; MACHO-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 + +; THUMB1-NOT: OUTLINED_FUNCTION_0 + +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 8 + ret i32 %cx +} + +define i32 @b() { +; ARM-LABEL: name: b +; ARM: bb.0.entry: +; ARM-NEXT: liveins: $r11, $lr +; ARM: $sp = frame-setup STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r11, killed $lr +; ARM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r11, -8 +; ARM-NEXT: BL @OUTLINED_FUNCTION_0{{.*}} +; ARM-NEXT: renamable $r0 = ADDri killed renamable $r0, 88, 14 /* CC::al */, $noreg, $noreg +; ARM-NEXT: $sp = frame-destroy LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r11, def $pc, implicit killed $r0 + +; THUMB-LABEL: name: b +; THUMB: bb.0.entry: +; THUMB-NEXT: liveins: $r7, $lr +; THUMB: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr +; THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8 +; THUMB-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; THUMB-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14 /* CC::al */, $noreg +; THUMB-NEXT: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc + +; MACHO-LABEL: name: b +; MACHO: bb.0.entry: +; MACHO-NEXT: liveins: $lr +; MACHO: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14 /* CC::al */, $noreg +; MACHO-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 4 +; MACHO-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; MACHO-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; MACHO-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14 /* CC::al */, $noreg +; MACHO-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg +; MACHO-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 88 + ret i32 %cx +} + +; ARM-LABEL: name: OUTLINED_FUNCTION_0 +; ARM: bb.0: +; ARM-NEXT: liveins: $r10, $r9, $r8, $r7, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8 +; ARM: $r0 = MOVi 1, 14 /* CC::al */, $noreg, $noreg +; ARM-NEXT: $r1 = MOVi 2, 14 /* CC::al */, $noreg, $noreg +; ARM-NEXT: $r2 = MOVi 3, 14 /* CC::al */, $noreg, $noreg +; ARM-NEXT: $r3 = MOVi 4, 14 /* CC::al */, $noreg, $noreg +; ARM-NEXT: TAILJMPd @thunk_called_fn, implicit $sp + +; THUMB-LABEL: name: OUTLINED_FUNCTION_0 +; THUMB: bb.0: +; THUMB-NEXT: liveins: $r11, $r10, $r9, $r8, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8 +; THUMB: $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg +; THUMB-NEXT: $r1, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg +; THUMB-NEXT: $r2, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg +; THUMB-NEXT: $r3, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg +; THUMB-NEXT: tTAILJMPdND @thunk_called_fn, 14 /* CC::al */, $noreg, implicit $sp + +; MACHO-LABEL: name: OUTLINED_FUNCTION_0 +; MACHO: bb.0: +; MACHO-NEXT: liveins: $r7, $r6, $r5, $r4, $r11, $r10, $r8, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8 +; MACHO: $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg +; MACHO-NEXT: $r1, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg +; MACHO-NEXT: $r2, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg +; MACHO-NEXT: $r3, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg +; MACHO-NEXT: tTAILJMPd @thunk_called_fn, 14 /* CC::al */, $noreg, implicit $sp diff --git a/llvm/test/CodeGen/ARM/machine-outliner-unoutlinable.mir b/llvm/test/CodeGen/ARM/machine-outliner-unoutlinable.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-unoutlinable.mir @@ -0,0 +1,167 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv7-- -run-pass=machine-outliner -verify-machineinstrs \ +# RUN: %s -o - | FileCheck %s + +--- | + define void @dont_outline_asm() #0 { ret void } + define void @dont_outline_lr() #0 { ret void } + define void @dont_outline_lr2() #0 { ret void } + define void @dont_outline_it() #0 { ret void } + define void @dont_outline_pic() #0 { ret void } + define void @dont_outline_mve() #0 { ret void } + declare void @z(i32, i32, i32, i32) + + attributes #0 = { minsize optsize } +... +--- + +name: dont_outline_asm +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dont_outline_asm + ; CHECK: bb.0: + ; CHECK: INLINEASM &"movs r0, #42", 1 + ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0 + ; CHECK: bb.1: + ; CHECK: INLINEASM &"movs r0, #42", 1 + ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0 + bb.0: + INLINEASM &"movs r0, #42", 1 + $r0, dead $cpsr = tMOVi8 1, 14, $noreg + $r1, dead $cpsr = tMOVi8 1, 14, $noreg + $r2, dead $cpsr = tMOVi8 1, 14, $noreg + $r3, dead $cpsr = tMOVi8 1, 14, $noreg + tBL 14, $noreg, @z + bb.1: + INLINEASM &"movs r0, #42", 1 + $r0, dead $cpsr = tMOVi8 1, 14, $noreg + $r1, dead $cpsr = tMOVi8 1, 14, $noreg + $r2, dead $cpsr = tMOVi8 1, 14, $noreg + $r3, dead $cpsr = tMOVi8 1, 14, $noreg + tBL 14, $noreg, @z + bb.2: + tBX_RET 14, $noreg +... +--- + +name: dont_outline_lr +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dont_outline_lr + ; CHECK-NOT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION + bb.0: + liveins: $lr + $r0 = tMOVr $lr, 14, $noreg + $r1 = tMOVr $lr, 14, $noreg + $r2 = tMOVr $lr, 14, $noreg + $r3 = tMOVr $lr, 14, $noreg + tBL 14, $noreg, @z + bb.1: + liveins: $lr + $r0 = tMOVr $lr, 14, $noreg + $r1 = tMOVr $lr, 14, $noreg + $r2 = tMOVr $lr, 14, $noreg + $r3 = tMOVr $lr, 14, $noreg + tBL 14, $noreg, @z + bb.2: + tBX_RET 14, $noreg +... +--- + +name: dont_outline_lr2 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dont_outline_lr2 + ; CHECK-NOT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION + bb.0: + liveins: $r0 + $lr = tMOVr $r0, 14, $noreg + $r1 = tMOVr $r0, 14, $noreg + $r2 = tMOVr $r0, 14, $noreg + $r3 = tMOVr $r0, 14, $noreg + $r4 = tMOVr $r0, 14, $noreg + tBLXr 14, $lr, $noreg + bb.1: + liveins: $r0 + $lr = tMOVr $r0, 14, $noreg + $r1 = tMOVr $r0, 14, $noreg + $r2 = tMOVr $r0, 14, $noreg + $r3 = tMOVr $r0, 14, $noreg + $r4 = tMOVr $r0, 14, $noreg + tBLXr 14, $lr, $noreg + bb.2: + tBX_RET 14, $noreg +... +--- + +name: dont_outline_it +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dont_outline_it + ; CHECK-NOT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION + bb.0: + t2IT 0, 1, implicit-def $itstate + $r0, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate + $r1, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate + $r2, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate + $r3, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate + tBL 14, $noreg, @z + bb.1: + t2IT 0, 1, implicit-def $itstate + $r0, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate + $r1, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate + $r2, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate + $r3, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate + tBL 14, $noreg, @z + bb.2: + tBX_RET 14, $noreg +... +--- + +name: dont_outline_pic +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dont_outline_pic + ; CHECK-NOT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION + bb.0: + $r0 = t2MOVi16_ga_pcrel target-flags(arm-lo16, arm-nonlazy) @z, 0 + $r0 = t2MOVTi16_ga_pcrel $r0, target-flags(arm-lo16, arm-nonlazy) @z, 0 + $r0 = PICADD $r0, 1, 14, $noreg + $r1 = PICLDR $r0, 2, 14, $noreg + PICSTR $r0, $r1, 3, 14, $noreg + tBL 14, $noreg, @z + bb.1: + $r0 = t2MOVi16_ga_pcrel target-flags(arm-lo16, arm-nonlazy) @z, 0 + $r0 = t2MOVTi16_ga_pcrel $r0, target-flags(arm-lo16, arm-nonlazy) @z, 0 + $r0 = PICADD $r0, 1, 14, $noreg + $r1 = PICLDR $r0, 2, 14, $noreg + PICSTR $r0, $r1, 3, 14, $noreg + tBL 14, $noreg, @z + bb.2: + tBX_RET 14, $noreg +... +--- + +name: dont_outline_mve +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dont_outline_mve + ; CHECK-NOT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION + bb.0: + liveins: $r3, $r4, $q0, $q3, $q4, $q5 + $q5 = MVE_VDUP32 $r3, 0, $noreg, $q5 + $q4 = MVE_VDUP32 $r4, 0, $noreg, $q4 + $q0 = MVE_VADDf32 $q4, $q5, 0, $noreg, $q0 + t2DoLoopStart $r4 + $r0 = MVE_VMOV_from_lane_32 renamable $q0, 1, 14, $noreg + tBL 14, $noreg, @z + bb.1: + liveins: $r3, $r4, $q0, $q3, $q4, $q5 + $q5 = MVE_VDUP32 $r3, 0, $noreg, $q5 + $q4 = MVE_VDUP32 $r4, 0, $noreg, $q4 + $q0 = MVE_VADDf32 $q4, $q5, 0, $noreg, $q0 + t2DoLoopStart $r4 + $r0 = MVE_VMOV_from_lane_32 renamable $q0, 1, 14, $noreg + tBL 14, $noreg, @z + bb.2: + tBX_RET 14, $noreg diff --git a/llvm/test/CodeGen/ARM/machine-outliner-unsafe-registers.mir b/llvm/test/CodeGen/ARM/machine-outliner-unsafe-registers.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-unsafe-registers.mir @@ -0,0 +1,114 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=arm-- -run-pass=machine-outliner -verify-machineinstrs \ +# RUN: %s -o - | FileCheck %s + +--- | + define void @outline_cpsr_r12_ok() #0 { ret void } + define void @dont_outline_cpsr_r12_1() #0 { ret void } + define void @dont_outline_cpsr_r12_2() #0 { ret void } + declare void @z(i32, i32, i32, i32) + + attributes #0 = { minsize optsize } +... +--- + +name: outline_cpsr_r12_ok +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: outline_cpsr_r12_ok + ; CHECK: bb.0: + ; CHECK: BL @OUTLINED_FUNCTION_0 + ; CHECK: $r3 = MOVr $r12, 14 /* CC::al */, $noreg, $noreg + ; CHECK: bb.1: + ; CHECK: BL @OUTLINED_FUNCTION_0 + ; CHECK: $r4 = MOVr $r12, 14 /* CC::al */, $noreg, $noreg + bb.0: + $r12 = MOVi 1, 14, $noreg, $noreg + CMPri $r12, 42, 14, $noreg, implicit-def $cpsr + $r0 = MOVi 1, 14, $noreg, $noreg + $r1 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r3 = MOVi 1, 14, $noreg, $noreg + BL @z + $r3 = MOVr $r12, 14, $noreg, $noreg + bb.1: + $r12 = MOVi 1, 14, $noreg, $noreg + CMPri $r12, 42, 14, $noreg, implicit-def $cpsr + $r0 = MOVi 1, 14, $noreg, $noreg + $r1 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r3 = MOVi 1, 14, $noreg, $noreg + BL @z + $r4 = MOVr $r12, 14, $noreg, $noreg + bb.2: + BX_RET 14, $noreg +... +--- + +name: dont_outline_cpsr_r12_1 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dont_outline_cpsr_r12_1 + ; CHECK: bb.0: + ; CHECK: BL @OUTLINED_FUNCTION_1 + ; CHECK: bb.1: + ; CHECK: BL @OUTLINED_FUNCTION_1 + ; CHECK-LABEL: bb.2: + ; CHECK-NOT: BL @OUTLINED_FUNCTION_1 + bb.0: + $r0 = MOVi 2, 14, $noreg, $noreg + $r1 = MOVi 2, 14, $noreg, $noreg + $r2 = MOVi 2, 14, $noreg, $noreg + $r3 = MOVi 2, 14, $noreg, $noreg + BL @z + bb.1: + $r0 = MOVi 2, 14, $noreg, $noreg + $r1 = MOVi 2, 14, $noreg, $noreg + $r2 = MOVi 2, 14, $noreg, $noreg + $r3 = MOVi 2, 14, $noreg, $noreg + BL @z + bb.2: + $r12 = MOVi 1, 14, $noreg, $noreg + CMPri $r12, 42, 14, $noreg, implicit-def $cpsr + $r0 = MOVi 2, 14, $noreg, $noreg + $r1 = MOVi 2, 14, $noreg, $noreg + $r2 = MOVi 2, 14, $noreg, $noreg + $r3 = MOVi 2, 14, $noreg, $noreg + BL @z + bb.3: + liveins: $cpsr, $r12 + BX_RET 14, $noreg +... +--- + +name: dont_outline_cpsr_r12_2 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dont_outline_cpsr_r12_2 + ; CHECK-NOT: BL @OUTLINED_FUNCTION + bb.0: + liveins: $r12 + CMPri $r12, 42, 14, $noreg, implicit-def $cpsr + $r0 = MOVi 3, 14, $noreg, $noreg + $r1 = MOVi 3, 14, $noreg, $noreg + $r2 = MOVi 3, 14, $noreg, $noreg + $r3 = MOVi 3, 14, $noreg, $noreg + BL @z + bb.1: + liveins: $r12 + CMPri $r12, 42, 14, $noreg, implicit-def $cpsr + $r0 = MOVi 3, 14, $noreg, $noreg + $r1 = MOVi 3, 14, $noreg, $noreg + $r2 = MOVi 3, 14, $noreg, $noreg + $r3 = MOVi 3, 14, $noreg, $noreg + BL @z + bb.2: + liveins: $r12 + CMPri $r12, 42, 14, $noreg, implicit-def $cpsr + $r0 = MOVi 3, 14, $noreg, $noreg + $r1 = MOVi 3, 14, $noreg, $noreg + $r2 = MOVi 3, 14, $noreg, $noreg + $r3 = MOVi 3, 14, $noreg, $noreg + BL @z + bb.3: + BX_RET 14, $noreg