diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -360,6 +360,10 @@
                      const outliner::Candidate &C) const override;
 
 private:
+  /// Returns an unused general-purpose register which can be used for
+  /// constructing an outlined call if one exists. Returns 0 otherwise.
+  unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
+
   unsigned getInstBundleLength(const MachineInstr &MI) const;
 
   int getVLDMDefCycle(const InstrItineraryData *ItinData,
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -5503,10 +5503,53 @@
 /// | Frame overhead in Bytes |      0 |   0 |
 /// | Stack fixup required    |     No |  No |
 /// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerNoLRSave implies that the function should be called using
+/// a BL instruction, but doesn't require LR to be saved and restored. This
+/// happens when LR is known to be dead.
+///
+/// That is,
+///
+/// I1                                OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION       I1
+/// I3                                I2
+///                                   I3
+///                                   BX LR
+///
+/// +-------------------------+--------+-----+
+/// |                         | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes  |      4 |   4 |
+/// | Frame overhead in Bytes |      4 |   4 |
+/// | Stack fixup required    |     No |  No |
+/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerRegSave implies that the function should be called with a
+/// save and restore of LR to an available register. This allows us to avoid
+/// stack fixups. Note that this outlining variant is compatible with the
+/// NoLRSave case.
+///
+/// That is,
+///
+/// I1     Save LR                    OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION       I1
+/// I3     Restore LR                 I2
+///                                   I3
+///                                   BX LR
+///
+/// +-------------------------+--------+-----+
+/// |                         | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes  |      8 |  12 |
+/// | Frame overhead in Bytes |      2 |   4 |
+/// | Stack fixup required    |     No |  No |
+/// +-------------------------+--------+-----+
 
 enum MachineOutlinerClass {
   MachineOutlinerTailCall,
-  MachineOutlinerThunk
+  MachineOutlinerThunk,
+  MachineOutlinerNoLRSave,
+  MachineOutlinerRegSave
 };
 
 enum MachineOutlinerMBBFlags {
@@ -5520,14 +5563,45 @@
   const int FrameTailCall;
   const int CallThunk;
   const int FrameThunk;
+  const int CallNoLRSave;
+  const int FrameNoLRSave;
+  const int CallRegSave;
+  const int FrameRegSave;
 
   OutlinerCosts(const ARMSubtarget &target)
       : CallTailCall(target.isThumb() ? 4 : 4),
         FrameTailCall(target.isThumb() ? 0 : 0),
         CallThunk(target.isThumb() ? 4 : 4),
-        FrameThunk(target.isThumb() ? 0 : 0) {}
+        FrameThunk(target.isThumb() ? 0 : 0),
+        CallNoLRSave(target.isThumb() ? 4 : 4),
+        FrameNoLRSave(target.isThumb() ? 4 : 4),
+        CallRegSave(target.isThumb() ? 8 : 12),
+        FrameRegSave(target.isThumb() ? 2 : 4) {}
 };
 
+unsigned
+ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
+  assert(C.LRUWasSet && "LRU wasn't set?");
+  MachineFunction *MF = C.getMF();
+  const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo *>(
+      MF->getSubtarget().getRegisterInfo());
+
+  BitVector regsReserved = ARI->getReservedRegs(*MF);
+  // Check if there is an available register across the sequence that we can
+  // use.
+  for (unsigned Reg :
+       (Subtarget.isThumb() ? ARM::tGPRRegClass : ARM::GPRRegClass)) {
+    if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
+        Reg != ARM::LR &&  // LR is not reserved, but don't use it.
+        Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
+        C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
+      return Reg;
+  }
+
+  // No suitable register. Return 0.
+  return 0u;
+}
+
 outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
   outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
@@ -5608,8 +5682,38 @@
     FrameID = MachineOutlinerThunk;
     NumBytesToCreateFrame = Costs->FrameThunk;
     SetCandidateCallInfo(MachineOutlinerThunk, Costs->CallThunk);
-  } else
-    return outliner::OutlinedFunction();
+  } else {
+    // We need to decide how to emit calls + frames. We can always emit the same
+    // frame if we don't need to save to the stack.
+    unsigned NumBytesNoStackCalls = 0;
+    std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
+
+    for (outliner::Candidate &C : RepeatedSequenceLocs) {
+      C.initLRU(TRI);
+
+      // Is LR available? If so, we don't need a save.
+      if (C.LRU.available(ARM::LR)) {
+        FrameID = MachineOutlinerNoLRSave;
+        NumBytesNoStackCalls += Costs->CallNoLRSave;
+        C.setCallInfo(MachineOutlinerNoLRSave, Costs->CallNoLRSave);
+        CandidatesWithoutStackFixups.push_back(C);
+      }
+
+      // Is an unused register available? If so, we won't modify the stack, so
+      // we can outline with the same frame type as those that don't save LR.
+      else if (findRegisterToSaveLRTo(C)) {
+        FrameID = MachineOutlinerRegSave;
+        NumBytesNoStackCalls += Costs->CallRegSave;
+        C.setCallInfo(MachineOutlinerRegSave, Costs->CallRegSave);
+        CandidatesWithoutStackFixups.push_back(C);
+      }
+    }
+
+    if (!CandidatesWithoutStackFixups.empty()) {
+      RepeatedSequenceLocs = CandidatesWithoutStackFixups;
+    } else
+      return outliner::OutlinedFunction();
+  }
 
   return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
                                     NumBytesToCreateFrame, FrameID);
@@ -5764,8 +5868,43 @@
     return outliner::InstrType::Illegal;
 
   // Does this use the stack?
-  if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI))
+  if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
+    // True if there is no chance that any outlined candidate from this range
+    // could require stack fixups. That is, both
+    // * LR is available in the range (No save/restore around call)
+    // * The range doesn't include calls (No save/restore in outlined frame)
+    // are true.
+    // FIXME: This is very restrictive; the flags check the whole block,
+    // not just the bit we will try to outline.
+    bool MightNeedStackFixUp =
+        (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
+                  MachineOutlinerMBBFlags::HasCalls));
+
+    // If this instruction is in a range where it *never* needs to be fixed
+    // up, then we can *always* outline it. This is true even if it's not
+    // possible to fix that instruction up.
+    //
+    // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
+    // use SP. Suppose that I1 sits within a range that definitely doesn't
+    // need stack fixups, while I2 sits in a range that does.
+    //
+    // First, I1 can be outlined as long as we *never* fix up the stack in
+    // any sequence containing it. I1 is already a safe instruction in the
+    // original program, so as long as we don't modify it we're good to go.
+    // So this leaves us with showing that outlining I2 won't break our
+    // program.
+    //
+    // Suppose I1 and I2 belong to equivalent candidate sequences. When we
+    // look at I2, we need to see if it can be fixed up. Suppose I2, (and
+    // thus I1) cannot be fixed up. Then I2 will be assigned an unique
+    // integer label; thus, I2 cannot belong to any candidate sequence (a
+    // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
+    // as well, so we're good. Thus, I1 is always safe to outline.
+    if (!MightNeedStackFixUp)
+      return outliner::InstrType::Legal;
+
     return outliner::InstrType::Illegal;
+  }
 
   return outliner::InstrType::Legal;
 }
@@ -5797,6 +5936,16 @@
           .add(Call->getOperand(0));
     Call->eraseFromParent();
   }
+
+  // If this is a tail call outlined function, then there's already a return.
+  if (OF.FrameConstructionID == MachineOutlinerTailCall ||
+      OF.FrameConstructionID == MachineOutlinerThunk)
+    return;
+
+  // It's not a tail call, so we have to insert the return ourselves.  Get the
+  // correct opcode from current feature set.
+  BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
+      .add(predOps(ARMCC::AL));
 }
 
 MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
@@ -5828,7 +5977,20 @@
     CallMIB.add(predOps(ARMCC::AL));
   CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
 
-  // Insert the call.
+  // Can we save to a register?
+  if (C.CallConstructionID == MachineOutlinerRegSave) {
+    MBB.addLiveIn(ARM::LR);
+    unsigned Reg = findRegisterToSaveLRTo(C);
+    assert(Reg != 0 && "No callee-saved register available?");
+
+    // Save and restore LR from that register.
+    copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
+    CallPt = MBB.insert(It, CallMIB);
+    copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
+    It--;
+    return CallPt;
+  }
+
   It = MBB.insert(It, CallMIB);
   return It;
 }
diff --git a/llvm/test/CodeGen/ARM/machine-outliner-nosave-and-regs.mir b/llvm/test/CodeGen/ARM/machine-outliner-nosave-and-regs.mir
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/machine-outliner-nosave-and-regs.mir
@@ -0,0 +1,116 @@
+# RUN: llc -mtriple=arm-- -run-pass=machine-outliner -verify-machineinstrs \
+# RUN: %s -o - | FileCheck %s
+# RUN: llc -mtriple=thumbv7-- -run-pass=machine-outliner -verify-machineinstrs \
+# RUN: %s -o - | FileCheck %s
+
+--- |
+  define void @outline_no_save_1() #0 { ret void }
+  define void @outline_no_save_2() #0 { ret void }
+  define void @outline_save_reg() #0 { ret void }
+  define void @outline_cpsr_ok() #0 { ret void }
+  define void @dont_outline_cpsr() #0 { ret void }
+  define void @dont_outline_r12() #0 { ret void }
+
+  attributes #0 = { minsize optsize }
+...
+---
+
+name:           outline_no_save_1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: bb.0:
+    ; CHECK: OUTLINED
+    liveins: $r1
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVr $r1, 14, $noreg, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           outline_no_save_2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: bb.0:
+    ; CHECK: OUTLINED
+    liveins: $r3
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVr $r3, 14, $noreg, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           outline_save_reg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: bb.0:
+    ; CHECK: OUTLINED
+    liveins: $lr
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVr $lr, 14, $noreg, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           outline_cpsr_ok
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: bb.0:
+    ; CHECK: OUTLINED
+    liveins: $r4
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    CMPri $r4, 42, 14, $noreg, implicit-def $cpsr
+    Bcc %bb.1, 13, $cpsr
+    $r2 = MOVr $r4, 14, $noreg, $noreg
+  bb.1:
+    BX_RET 14, $noreg
+...
+---
+
+name:           dont_outline_cpsr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: bb.0:
+    ; CHECK-NOT: BL
+    liveins: $r4
+    CMPri $r4, 42, 14, $noreg, implicit-def $cpsr
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    Bcc %bb.1, 13, $cpsr
+    $r2 = MOVr $r4, 14, $noreg, $noreg
+  bb.1:
+    BX_RET 14, $noreg
+...
+---
+
+name:           dont_outline_r12
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: bb.0:
+    ; CHECK-NOT: BL
+    liveins: $r12
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVr $r12, 14, $noreg, $noreg
+    BX_RET 14, $noreg