diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6059,8 +6059,9 @@ // We only support -moutline in AArch64 right now. If we're not compiling // for AArch64, emit a warning and ignore the flag. Otherwise, add the // proper mllvm flags. - if (Triple.getArch() != llvm::Triple::aarch64 && - Triple.getArch() != llvm::Triple::aarch64_32) { + if (!(Triple.isARM() || Triple.isThumb() || + Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32)) { D.Diag(diag::warn_drv_moutline_unsupported_opt) << Triple.getArchName(); } else { CmdArgs.push_back("-mllvm"); diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h --- a/llvm/include/llvm/CodeGen/TargetPassConfig.h +++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h @@ -132,6 +132,9 @@ /// callers. bool RequireCodeGenSCCOrder = false; + /// Default setting for -enable-machine-outliner + bool MachineOutlinerEnabled = false; + /// Add the actual instruction selection passes. This does not include /// preparation passes on IR. bool addCoreISelPasses(); diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -1155,6 +1155,9 @@ // Outlined functions shouldn't preserve liveness. MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); MF.getRegInfo().freezeReservedRegs(MF); + MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA); + MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs); + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); // If there's a DISubprogram associated with this outlined function, then // emit debug info for the outlined function. diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -979,8 +979,10 @@ bool RunOnAllFunctions = (EnableMachineOutliner == AlwaysOutline); bool AddOutliner = RunOnAllFunctions || TM->Options.SupportsDefaultOutlining; - if (AddOutliner) + if (AddOutliner) { + MachineOutlinerEnabled = true; addPass(createMachineOutlinerPass(RunOnAllFunctions)); + } } // Add passes that directly emit MI after all other MI passes. diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -343,6 +343,22 @@ ArrayRef> getSerializableBitmaskMachineOperandTargetFlags() const override; + /// ARM supports the MachineOutliner. + bool isFunctionSafeToOutlineFrom(MachineFunction &MF, + bool OutlineFromLinkOnceODRs) const override; + outliner::OutlinedFunction getOutliningCandidateInfo( + std::vector &RepeatedSequenceLocs) const override; + outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const override; + bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const override; + void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, + const outliner::OutlinedFunction &OF) const override; + MachineBasicBlock::iterator + insertOutlinedCall(Module &M, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It, MachineFunction &MF, + const outliner::Candidate &C) const override; + private: unsigned getInstBundleLength(const MachineInstr &MI) const; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" @@ -5460,3 +5461,374 @@ return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) < ConstantMaterializationCost(Val2, Subtarget, !ForCodesize); } + +/// Constants defining how certain sequences should be outlined. +/// This encompasses how an outlined function should be called, and what kind of +/// frame should be emitted for that outlined function. +/// +/// \p MachineOutlinerTailCall implies that the function is being created from +/// a sequence of instructions ending in a return. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> B OUTLINED_FUNCTION I1 +/// BX LR I2 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 0 | 0 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerThunk implies that the function is being created from +/// a sequence of instructions ending in a call. The outlined function is +/// called with a BL instruction, and the outlined function tail-calls the +/// original call destination. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// BL f I2 +/// B f +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 0 | 0 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ + +enum MachineOutlinerClass { + MachineOutlinerTailCall, + MachineOutlinerThunk +}; + +enum MachineOutlinerMBBFlags { + LRUnavailableSomewhere = 0x2, + HasCalls = 0x4, + UnsafeRegsDead = 0x8 +}; + +struct OutlinerCosts { + const int CallTailCall; + const int FrameTailCall; + const int CallThunk; + const int FrameThunk; + + OutlinerCosts(const ARMSubtarget &target) + : CallTailCall(target.isThumb() ? 4 : 4), + FrameTailCall(target.isThumb() ? 0 : 0), + CallThunk(target.isThumb() ? 4 : 4), + FrameThunk(target.isThumb() ? 0 : 0) {} +}; + +outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( + std::vector &RepeatedSequenceLocs) const { + outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; + unsigned SequenceSize = + std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0, + [this](unsigned Sum, const MachineInstr &MI) { + return Sum + getInstSizeInBytes(MI); + }); + + // Properties about candidate MBBs that hold for all of them. + unsigned FlagsSetInAll = 0xF; + + // Compute liveness information for each candidate, and set FlagsSetInAll. + const TargetRegisterInfo &TRI = getRegisterInfo(); + std::for_each( + RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; }); + + // According to the ARM Procedure Call Standard, the following are + // undefined on entry/exit from a function call: + // + // * Register R12(IP), + // * Condition codes (and thus the CPSR register) + // + // Because of this, we can't outline any sequence of instructions where one + // of these registers is live into/across it. Thus, we need to delete those + // candidates. + auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { + // If the unsafe registers in this block are all dead, then we don't need + // to compute liveness here. + if (C.Flags & UnsafeRegsDead) + return false; + C.initLRU(TRI); + LiveRegUnits LRU = C.LRU; + return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR)); + }; + + // Are there any candidates where those registers are live? + if (!(FlagsSetInAll & UnsafeRegsDead)) { + // Erase every candidate that violates the restrictions above. (It could be + // true that we have viable candidates, so it's not worth bailing out in + // the case that, say, 1 out of 20 candidates violate the restructions.) + RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), + RepeatedSequenceLocs.end(), + CantGuaranteeValueAcrossCall), + RepeatedSequenceLocs.end()); + + // If the sequence doesn't have enough candidates left, then we're done. + if (RepeatedSequenceLocs.size() < 2) + return outliner::OutlinedFunction(); + } + + // At this point, we have only "safe" candidates to outline. Figure out + // frame + call instruction information. + + unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); + + // Helper lambda which sets call information for every candidate. + auto SetCandidateCallInfo = + [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { + for (outliner::Candidate &C : RepeatedSequenceLocs) + C.setCallInfo(CallID, NumBytesForCall); + }; + + OutlinerCosts *Costs = new OutlinerCosts(Subtarget); + unsigned FrameID = 0; + unsigned NumBytesToCreateFrame = 0; + + // If the last instruction in any candidate is a terminator, then we should + // tail call all of the candidates. + if (RepeatedSequenceLocs[0].back()->isTerminator()) { + FrameID = MachineOutlinerTailCall; + NumBytesToCreateFrame = Costs->FrameTailCall; + SetCandidateCallInfo(MachineOutlinerTailCall, Costs->CallTailCall); + } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX || + LastInstrOpcode == ARM::tBL || LastInstrOpcode == ARM::tBLXr || + LastInstrOpcode == ARM::tBLXi) { + FrameID = MachineOutlinerThunk; + NumBytesToCreateFrame = Costs->FrameThunk; + SetCandidateCallInfo(MachineOutlinerThunk, Costs->CallThunk); + } else + return outliner::OutlinedFunction(); + + return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, + NumBytesToCreateFrame, FrameID); +} + +bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom( + MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { + const Function &F = MF.getFunction(); + + // Can F be deduplicated by the linker? If it can, don't outline from it. + if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) + return false; + + // Don't outline from functions with section markings; the program could + // expect that all the code is in the named section. + // FIXME: Allow outlining from multiple functions with the same section + // marking. + if (F.hasSection()) + return false; + + // FIXME: Thumb1 outlining is not handled + if (MF.getInfo()->isThumb1OnlyFunction()) + return false; + + // It's safe to outline from MF. + return true; +} + +bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const { + // Check if LR is available through all of the MBB. If it's not, then set + // a flag. + assert(MBB.getParent()->getRegInfo().tracksLiveness() && + "Suitable Machine Function for outlining must track liveness"); + + LiveRegUnits LRU(getRegisterInfo()); + + std::for_each(MBB.rbegin(), MBB.rend(), + [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); + + // Check if each of the unsafe registers are available... + bool R12AvailableInBlock = LRU.available(ARM::R12); + bool CPSRAvailableInBlock = LRU.available(ARM::CPSR); + + // If all of these are dead (and not live out), we know we don't have to check + // them later. + if (R12AvailableInBlock && CPSRAvailableInBlock) + Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; + + // Now, add the live outs to the set. + LRU.addLiveOuts(MBB); + + // If any of these registers is available in the MBB, but also a live out of + // the block, then we know outlining is unsafe. + if (R12AvailableInBlock && !LRU.available(ARM::R12)) + return false; + if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR)) + return false; + + // Check if there's a call inside this MachineBasicBlock. If there is, then + // set a flag. + if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) + Flags |= MachineOutlinerMBBFlags::HasCalls; + + if (!LRU.available(ARM::LR)) + Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; + + return true; +} + +outliner::InstrType +ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const { + MachineInstr &MI = *MIT; + const TargetRegisterInfo *TRI = &getRegisterInfo(); + + // Be conservative with inline ASM + if (MI.isInlineAsm()) + return outliner::InstrType::Invisible; + + // Don't allow debug values to impact outlining type. + if (MI.isDebugInstr() || MI.isIndirectDebugValue()) + return outliner::InstrType::Invisible; + + // At this point, KILL instructions don't really tell us much so we can go + // ahead and skip over them. + if (MI.isKill()) + return outliner::InstrType::Invisible; + + // PIC instructions contain labels, outlining them would break offset + // computing. + unsigned Opc = MI.getOpcode(); + if (Opc == ARM::t2IT || Opc == ARM::tPICADD || Opc == ARM::PICADD || + Opc == ARM::PICSTR || Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || + Opc == ARM::PICLDR || Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || + Opc == ARM::PICLDRSB || Opc == ARM::PICLDRSH || + Opc == ARM::t2LDRpci_pic || Opc == ARM::t2MOVi16_ga_pcrel || + Opc == ARM::t2MOVTi16_ga_pcrel || Opc == ARM::t2MOV_ga_pcrel) + return outliner::InstrType::Illegal; + + // Is this a terminator for a basic block? + if (MI.isTerminator()) { + // Don't outline if the branch is not unconditional. + if (Opc == ARM::BX_RET || Opc == ARM::tBX_RET || Opc == ARM::MOVPCLR) { + if (MI.getOperand(0).getImm() != ARMCC::AL) + return outliner::InstrType::Illegal; + } + if (Opc == ARM::LDMIA_RET) { + if (MI.getOperand(2).getImm() != ARMCC::AL) + return outliner::InstrType::Illegal; + } + + // Is this the end of a function? + if (MI.getParent()->succ_empty()) + return outliner::InstrType::Legal; + + // It's not, so don't outline it. + return outliner::InstrType::Illegal; + } + + // Make sure none of the operands are un-outlinable. + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || + MOP.isTargetIndex()) + return outliner::InstrType::Illegal; + + // If it uses LR then don't touch it. + if (MOP.isReg() && !MOP.isImplicit() && (MOP.getReg() == ARM::LR)) + return outliner::InstrType::Illegal; + } + + if (MI.isCall()) { + // If we don't know anything about the callee, assume it depends on the + // stack layout of the caller. In that case, it's only legal to outline + // as a tail-call. Whitelist the call instructions we know about so we + // don't get unexpected results with call pseudo-instructions. + auto UnknownCallOutlineType = outliner::InstrType::Illegal; + if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX || + Opc == ARM::tBLXr || Opc == ARM::tBLXi) + UnknownCallOutlineType = outliner::InstrType::LegalTerminator; + + return UnknownCallOutlineType; + } + + // Don't outline positions. + if (MI.isPosition()) + return outliner::InstrType::Illegal; + + // Don't touch the link register + if (MI.readsRegister(ARM::LR, &getRegisterInfo()) || + MI.modifiesRegister(ARM::LR, &getRegisterInfo())) + return outliner::InstrType::Illegal; + + // Does this use the stack? + if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) + return outliner::InstrType::Illegal; + + return outliner::InstrType::Legal; +} + +void ARMBaseInstrInfo::buildOutlinedFrame( + MachineBasicBlock &MBB, MachineFunction &MF, + const outliner::OutlinedFunction &OF) const { + // For thunk outlining, rewrite the last instruction from a call to a + // tail-call. + if (OF.FrameConstructionID == MachineOutlinerThunk) { + MachineInstr *Call = &*--MBB.instr_end(); + if (Subtarget.isThumb()) + if (Call->getOperand(2).isReg()) + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPr)) + .add(Call->getOperand(2)); + else if (Subtarget.isTargetMachO()) + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPd)) + .add(Call->getOperand(2)) + .add(predOps(ARMCC::AL)); + else + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPdND)) + .add(Call->getOperand(2)) + .add(predOps(ARMCC::AL)); + else if (Call->getOperand(0).isReg()) + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::TAILJMPr)) + .add(Call->getOperand(0)); + else + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::TAILJMPd)) + .add(Call->getOperand(0)); + Call->eraseFromParent(); + } +} + +MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( + Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, + MachineFunction &MF, const outliner::Candidate &C) const { + MachineInstrBuilder MIB; + MachineBasicBlock::iterator CallPt; + unsigned Opc; + bool isThumb = Subtarget.isThumb(); + + // Are we tail calling? + if (C.CallConstructionID == MachineOutlinerTailCall) { + // If yes, then we can just branch to the label. + Opc = isThumb + ? (Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) + : ARM::TAILJMPd; + MIB = BuildMI(MF, DebugLoc(), get(Opc)) + .addGlobalAddress(M.getNamedValue(MF.getName())); + if (isThumb) + MIB.add(predOps(ARMCC::AL)); + It = MBB.insert(It, MIB); + return It; + } + + // Create the call instruction. + Opc = isThumb ? ARM::tBL : ARM::BL; + MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc)); + if (isThumb) + CallMIB.add(predOps(ARMCC::AL)); + CallMIB.addGlobalAddress(M.getNamedValue(MF.getName())); + + // Insert the call. + It = MBB.insert(It, CallMIB); + return It; +} diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -244,6 +244,10 @@ } initAsmInfo(); + + // ARM supports the MachineOutliner. + setMachineOutliner(true); + setSupportsDefaultOutlining(false); } ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; @@ -543,7 +547,8 @@ void ARMPassConfig::addPreEmitPass2() { addPass(createARMConstantIslandPass()); - addPass(createARMLowOverheadLoopsPass()); + if (!MachineOutlinerEnabled) + addPass(createARMLowOverheadLoopsPass()); // Identify valid longjmp targets for Windows Control Flow Guard. if (TM->getTargetTriple().isOSWindows()) diff --git a/llvm/test/CodeGen/ARM/machine-outliner-tail.ll b/llvm/test/CodeGen/ARM/machine-outliner-tail.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-tail.ll @@ -0,0 +1,42 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=arm-- \ +; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=thumbv7-- \ +; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB +; RUN: llc -verify-machineinstrs -enable-machine-outliner \ +; RUN: -mtriple=thumbv7-apple-darwin --stop-after=machine-outliner < %s \ +; RUN: | FileCheck %s --check-prefix=MACHO + +; ARM-LABEL: name: OUTLINED_FUNCTION_0 +; ARM: $r0 = MOVi 1, 14, $noreg, $noreg +; ARM-NEXT: $r1 = MOVi 2, 14, $noreg, $noreg +; ARM-NEXT: $r2 = MOVi 3, 14, $noreg, $noreg +; ARM-NEXT: $r3 = MOVi 4, 14, $noreg, $noreg +; ARM-NEXT: TAILJMPd @z + +; THUMB-LABEL: name: OUTLINED_FUNCTION_0 +; THUMB: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; THUMB-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; THUMB-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; THUMB-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; THUMB-NEXT: tTAILJMPdND @z, 14, $noreg + +; MACHO-LABEL: name: OUTLINED_FUNCTION_0 +; MACHO: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; MACHO-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; MACHO-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; MACHO-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; MACHO-NEXT: tTAILJMPd @z, 14, $noreg + +define void @a() { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} + +declare void @z(i32, i32, i32, i32) + +define dso_local void @b(i32* nocapture readnone %p) { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} diff --git a/llvm/test/CodeGen/ARM/machine-outliner-thunk.ll b/llvm/test/CodeGen/ARM/machine-outliner-thunk.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-thunk.ll @@ -0,0 +1,111 @@ +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=armv7-- \ +; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv7-- \ +; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB +; RUN: llc -enable-machine-outliner -verify-machineinstrs \ +; RUN: -mtriple=thumbv7-apple-darwin -stop-after=machine-outliner < %s \ +; RUN: | FileCheck %s --check-prefix=MACHO + +declare i32 @thunk_called_fn(i32, i32, i32, i32) + +define i32 @a() { +; ARM-LABEL: name: a +; ARM: bb.0.entry: +; ARM-NEXT: liveins: $r11, $lr +; ARM: $sp = frame-setup STMDB_UPD $sp, 14, $noreg, killed $r11, killed $lr +; ARM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r11, -8 +; ARM-NEXT: BL @OUTLINED_FUNCTION_0{{.*}} +; ARM-NEXT: renamable $r0 = ADDri killed renamable $r0, 8, 14, $noreg, $noreg +; ARM-NEXT: $sp = LDMIA_RET $sp, 14, $noreg, def $r11, def $pc, implicit killed $r0 + +; THUMB-LABEL: name: a +; THUMB: bb.0.entry: +; THUMB-NEXT: liveins: $r7, $lr +; THUMB: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr +; THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8 +; THUMB-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; THUMB-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14, $noreg +; THUMB-NEXT: tPOP_RET 14, $noreg, def $r7, def $pc + +; MACHO-LABEL: name: a +; MACHO: bb.0.entry: +; MACHO-NEXT: liveins: $lr +; MACHO: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14, $noreg +; MACHO-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 4 +; MACHO-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; MACHO-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; MACHO-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14, $noreg +; MACHO-NEXT: $lr, $sp = t2LDR_POST $sp, 4, 14, $noreg +; MACHO-NEXT: tBX_RET 14, $noreg, implicit killed $r0 +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 8 + ret i32 %cx +} + +define i32 @b() { +; ARM-LABEL: name: b +; ARM: bb.0.entry: +; ARM-NEXT: liveins: $r11, $lr +; ARM: $sp = frame-setup STMDB_UPD $sp, 14, $noreg, killed $r11, killed $lr +; ARM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r11, -8 +; ARM-NEXT: BL @OUTLINED_FUNCTION_0{{.*}} +; ARM-NEXT: renamable $r0 = ADDri killed renamable $r0, 88, 14, $noreg, $noreg +; ARM-NEXT: $sp = LDMIA_RET $sp, 14, $noreg, def $r11, def $pc, implicit killed $r0 + +; THUMB-LABEL: name: b +; THUMB: bb.0.entry: +; THUMB-NEXT: liveins: $r7, $lr +; THUMB: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr +; THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8 +; THUMB-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; THUMB-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14, $noreg +; THUMB-NEXT: tPOP_RET 14, $noreg, def $r7, def $pc + +; MACHO-LABEL: name: b +; MACHO: bb.0.entry: +; MACHO-NEXT: liveins: $lr +; MACHO: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14, $noreg +; MACHO-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 4 +; MACHO-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; MACHO-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; MACHO-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14, $noreg +; MACHO-NEXT: $lr, $sp = t2LDR_POST $sp, 4, 14, $noreg +; MACHO-NEXT: tBX_RET 14, $noreg, implicit killed $r0 +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 88 + ret i32 %cx +} + +; ARM-LABEL: name: OUTLINED_FUNCTION_0 +; ARM: bb.0: +; ARM-NEXT: $r0 = MOVi 1, 14, $noreg, $noreg +; ARM-NEXT: $r1 = MOVi 2, 14, $noreg, $noreg +; ARM-NEXT: $r2 = MOVi 3, 14, $noreg, $noreg +; ARM-NEXT: $r3 = MOVi 4, 14, $noreg, $noreg +; ARM-NEXT: TAILJMPd @thunk_called_fn, implicit $sp + +; THUMB-LABEL: name: OUTLINED_FUNCTION_0 +; THUMB: bb.0: +; THUMB-NEXT: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; THUMB-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; THUMB-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; THUMB-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; THUMB-NEXT: tTAILJMPdND @thunk_called_fn, 14, $noreg, implicit $sp + +; MACHO-LABEL: name: OUTLINED_FUNCTION_0 +; MACHO: bb.0: +; MACHO-NEXT: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; MACHO-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; MACHO-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; MACHO-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; MACHO-NEXT: tTAILJMPd @thunk_called_fn, 14, $noreg, implicit $sp