Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -1639,10 +1639,12 @@ /// Represents how an instruction should be mapped by the outliner. /// \p Legal instructions are those which are safe to outline. + /// \p LegalTerminator instructions are safe to outline, but only as the + /// last instruction in a sequence. /// \p Illegal instructions are those which cannot be outlined. /// \p Invisible instructions are instructions which can be outlined, but /// shouldn't actually impact the outlining result. - enum MachineOutlinerInstrType { Legal, Illegal, Invisible }; + enum MachineOutlinerInstrType { Legal, LegalTerminator, Illegal, Invisible }; /// Returns how or if \p MI should be outlined. virtual MachineOutlinerInstrType Index: lib/CodeGen/MachineOutliner.cpp =================================================================== --- lib/CodeGen/MachineOutliner.cpp +++ lib/CodeGen/MachineOutliner.cpp @@ -777,6 +777,13 @@ mapToLegalUnsigned(It); break; + case TargetInstrInfo::MachineOutlinerInstrType::LegalTerminator: + mapToLegalUnsigned(It); + InstrList.push_back(It); + UnsignedVec.push_back(IllegalInstrNumber); + IllegalInstrNumber--; + break; + case TargetInstrInfo::MachineOutlinerInstrType::Invisible: break; } Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4901,10 +4901,26 @@ /// * Frame construction overhead: 1 (RET) /// * Requires stack fixups? No /// + /// \p MachineOutlinerThunk implies that the function is being created from + /// a sequence of instructions ending in a call. The outlined function is + /// called with a BL instruction, and the outlined function tail-calls the + /// original call destination. + /// + /// That is, + /// + /// I1 OUTLINED_FUNCTION: + /// I2 --> BL OUTLINED_FUNCTION I1 + /// BL f I2 + /// B f + /// * Call construction overhead: 1 (BL) + /// * Frame construction overhead: 0 + /// * Requires stack fixups? No + /// enum MachineOutlinerClass { MachineOutlinerDefault, /// Emit a save, restore, call, and return. MachineOutlinerTailCall, /// Only emit a branch. - MachineOutlinerNoLRSave /// Emit a call and return. + MachineOutlinerNoLRSave, /// Emit a call and return. + MachineOutlinerThunk, /// Emit a call and tail-call. }; enum MachineOutlinerMBBFlags { @@ -4950,6 +4966,8 @@ [this](std::pair &I) { return canOutlineWithoutLRSave(I.second); }; + unsigned LastInstrOpcode = RepeatedSequenceLocs[0].second->getOpcode(); + // If the last instruction in any candidate is a terminator, then we should // tail call all of the candidates. if (RepeatedSequenceLocs[0].second->isTerminator()) { @@ -4959,6 +4977,14 @@ NumBytesToCreateFrame = 0; } + else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) { + // FIXME: Do we need to check if the code after this uses the value of LR? + CallID = MachineOutlinerThunk; + FrameID = MachineOutlinerThunk; + NumBytesForCall = 4; + NumBytesToCreateFrame = 0; + } + else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), DoesntNeedLRSave)) { CallID = MachineOutlinerNoLRSave; @@ -4977,8 +5003,9 @@ // last instruction is a call. We don't want to save + restore in this case. // However, it could be possible that the last instruction is a call without // it being valid to tail call this sequence. We should consider this as well. - else if (RepeatedSequenceLocs[0].second->isCall() && - FrameID != MachineOutlinerTailCall) + else if (FrameID != MachineOutlinerThunk && + FrameID != MachineOutlinerTailCall && + RepeatedSequenceLocs[0].second->isCall()) NumBytesToCreateFrame += 8; return MachineOutlinerInfo(SequenceSize, NumBytesForCall, @@ -5092,36 +5119,49 @@ // stack. Thus, if we outline, say, half the parameters for a function call // plus the call, then we'll break the callee's expectations for the layout // of the stack. + // + // FIXME: Allow calls to functions which construct a stack frame, as long + // as they don't access arguments on the stack. + // FIXME: Figure out some way to analyze functions defined in other modules. + // We should be able to compute the memory usage based on the IR calling + // convention, even if we can't see the definition. if (MI.isCall()) { const Module *M = MF->getFunction().getParent(); assert(M && "No module?"); // Get the function associated with the call. Look at each operand and find // the one that represents the callee and get its name. - Function *Callee = nullptr; + const Function *Callee = nullptr; for (const MachineOperand &MOP : MI.operands()) { - if (MOP.isSymbol()) { - Callee = M->getFunction(MOP.getSymbolName()); - break; - } - - else if (MOP.isGlobal()) { - Callee = M->getFunction(MOP.getGlobal()->getGlobalIdentifier()); + if (MOP.isGlobal()) { + Callee = dyn_cast(MOP.getGlobal()); break; } } - // Only handle functions that we have information about. - if (!Callee) + // Never outline calls to mcount. There isn't any rule that would require + // this, but the Linux kernel's "ftrace" feature depends on it. + if (Callee && Callee->getName() == "\01_mcount") return MachineOutlinerInstrType::Illegal; + // If we don't know anything about the callee, assume it depends on the + // stack layout of the caller. In that case, it's only legal to outline + // as a tail-call. Whitelist the call instructions we know about so we + // don't get unexpected results with call pseudo-instructions. + auto UnknownCallOutlineType = MachineOutlinerInstrType::Illegal; + if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL) + UnknownCallOutlineType = MachineOutlinerInstrType::LegalTerminator; + + if (!Callee) + return UnknownCallOutlineType; + // We have a function we have information about. Check it if it's something // can safely outline. MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); // We don't know what's going on with the callee at all. Don't touch it. if (!CalleeMF) - return MachineOutlinerInstrType::Illegal; + return UnknownCallOutlineType; // Check if we know anything about the callee saves on the function. If we // don't, then don't touch it, since that implies that we haven't @@ -5129,7 +5169,7 @@ MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || MFI.getNumObjects() > 0) - return MachineOutlinerInstrType::Illegal; + return UnknownCallOutlineType; // At this point, we can say that CalleeMF ought to not pass anything on the // stack. Therefore, we can outline it. @@ -5153,6 +5193,8 @@ // * LR is available in the range (No save/restore around call) // * The range doesn't include calls (No save/restore in outlined frame) // are true. + // FIXME: This is very restrictive; the flags check the whole block, + // not just the bit we will try to outline. bool MightNeedStackFixUp = (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere | MachineOutlinerMBBFlags::HasCalls)); @@ -5267,6 +5309,24 @@ void AArch64InstrInfo::insertOutlinerEpilogue( MachineBasicBlock &MBB, MachineFunction &MF, const MachineOutlinerInfo &MInfo) const { + // For thunk outlining, rewrite the last instruction from a call to a + // tail-call. + if (MInfo.FrameConstructionID == MachineOutlinerThunk) { + MachineInstr *Call = &*--MBB.instr_end(); + unsigned TailOpcode; + if (Call->getOpcode() == AArch64::BL) { + TailOpcode = AArch64::TCRETURNdi; + } else { + assert(Call->getOpcode() == AArch64::BLR); + TailOpcode = AArch64::TCRETURNri; + } + MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode)) + .add(Call->getOperand(0)) + .addImm(0); + MBB.insert(MBB.end(), TC); + Call->eraseFromParent(); + } + // Is there a call in the outlined range? auto IsNonTailCall = [](MachineInstr &MI) { return MI.isCall() && !MI.isReturn(); @@ -5274,6 +5334,8 @@ if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) { // Fix up the instructions in the range, since we're going to modify the // stack. + assert(MInfo.FrameConstructionID != MachineOutlinerDefault && + "Can only fix up stack references once"); fixupPostOutline(MBB); // LR has to be a live in so that we can save it. @@ -5282,7 +5344,8 @@ MachineBasicBlock::iterator It = MBB.begin(); MachineBasicBlock::iterator Et = MBB.end(); - if (MInfo.FrameConstructionID == MachineOutlinerTailCall) + if (MInfo.FrameConstructionID == MachineOutlinerTailCall || + MInfo.FrameConstructionID == MachineOutlinerThunk) Et = std::prev(MBB.end()); // Insert a save before the outlined region @@ -5322,7 +5385,8 @@ } // If this is a tail call outlined function, then there's already a return. - if (MInfo.FrameConstructionID == MachineOutlinerTailCall) + if (MInfo.FrameConstructionID == MachineOutlinerTailCall || + MInfo.FrameConstructionID == MachineOutlinerThunk) return; // It's not a tail call, so we have to insert the return ourselves. @@ -5357,7 +5421,8 @@ } // Are we saving the link register? - if (MInfo.CallConstructionID == MachineOutlinerNoLRSave) { + if (MInfo.CallConstructionID == MachineOutlinerNoLRSave || + MInfo.CallConstructionID == MachineOutlinerThunk) { // No, so just insert the call. It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) .addGlobalAddress(M.getNamedValue(MF.getName()))); Index: test/CodeGen/AArch64/machine-outliner-thunk.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/machine-outliner-thunk.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -enable-machine-outliner -verify-machineinstrs | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-pc-linux-gnu" + +declare i32 @thunk_called_fn(i32, i32, i32, i32) + +define i32 @a() { +; CHECK-LABEL: a: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: add w0, w0, #8 // =8 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 8 + ret i32 %cx +} + +define i32 @b() { +; CHECK-LABEL: b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: add w0, w0, #88 // =88 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 88 + ret i32 %cx +} + +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w0, wzr, #0x1 +; CHECK-NEXT: orr w1, wzr, #0x2 +; CHECK-NEXT: orr w2, wzr, #0x3 +; CHECK-NEXT: orr w3, wzr, #0x4 +; CHECK-NEXT: b thunk_called_fn Index: test/CodeGen/AArch64/machine-outliner.mir =================================================================== --- test/CodeGen/AArch64/machine-outliner.mir +++ test/CodeGen/AArch64/machine-outliner.mir @@ -124,6 +124,8 @@ $w17 = ORRWri $wzr, 1 $w17 = ORRWri $wzr, 1 BL @baz, implicit-def dead $lr, implicit $sp + $w17 = ORRWri $wzr, 1 + $w17 = ORRWri $wzr, 1 $w17 = ORRWri $wzr, 2 BL @baz, implicit-def dead $lr, implicit $sp $w17 = ORRWri $wzr, 1 @@ -131,6 +133,8 @@ $w17 = ORRWri $wzr, 1 $w17 = ORRWri $wzr, 1 BL @baz, implicit-def dead $lr, implicit $sp + $w17 = ORRWri $wzr, 1 + $w17 = ORRWri $wzr, 1 $w8 = ORRWri $wzr, 0 bb.2: