Index: include/llvm/Target/TargetLowering.h
===================================================================
--- include/llvm/Target/TargetLowering.h
+++ include/llvm/Target/TargetLowering.h
@@ -50,6 +50,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetCallingConv.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/LiveInterval.h"
 #include <algorithm>
 #include <cassert>
 #include <climits>
@@ -3178,6 +3179,21 @@
     return false;
   }
 
+  /// The target can specify whether a callee-saved register should be used
+  /// rather than spliting the live range. Default behaviour is yes.
+  virtual bool useCSRInsteadOfSplit(const LiveInterval &LI) const {
+    return true;
+  }
+
+  /// Target specific cost of using a callee-saved register for the first time
+  /// when the live range of the value spans the passed blocks. A target should
+  /// only return a value other than zero here if splitting might be preferred
+  /// to a CSR use (i.e. it would return false from useCSRInsteadOfSplit for the
+  /// respective live range).
+  virtual int64_t costOfFirstCSRForBlocks(
+    const SmallVectorImpl<MachineBasicBlock*> &UseMBBs) const {
+    return 0;
+  }
   /// Lower TLS global address SDNode for target independent emulated TLS model.
   virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
                                           SelectionDAG &DAG) const;
Index: lib/CodeGen/RegAllocGreedy.cpp
===================================================================
--- lib/CodeGen/RegAllocGreedy.cpp
+++ lib/CodeGen/RegAllocGreedy.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetLowering.h"
 #include <queue>
 
 using namespace llvm;
@@ -120,6 +121,7 @@
   // Shortcuts to some useful interface.
   const TargetInstrInfo *TII;
   const TargetRegisterInfo *TRI;
+  const TargetLowering *TLI;
   RegisterClassInfo RCI;
 
   // analyses
@@ -2331,7 +2333,17 @@
     // the cost of splitting is lower than CSRCost.
     SA->analyze(&VirtReg);
     unsigned NumCands = 0;
-    BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
+    BlockFrequency BestCost;
+    bool GetCostFromTarget = !TLI->useCSRInsteadOfSplit(VirtReg);
+    if (GetCostFromTarget) {
+      ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+      SmallVector<MachineBasicBlock*, 4> UseMBBs;
+      for (int i = 0, e = UseBlocks.size(); i < e; ++i)
+        UseMBBs.push_back(UseBlocks[i].MBB);
+      BestCost = TLI->costOfFirstCSRForBlocks(UseMBBs);
+    } else
+      BestCost = CSRCost;
+
     unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
                                                  NumCands, true /*IgnoreCSR*/);
     if (BestCand == NoCand)
@@ -2550,8 +2562,8 @@
     // When NewVRegs is not empty, we may have made decisions such as evicting
     // a virtual register, go with the earlier decisions and use the physical
     // register.
-    if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) &&
-        NewVRegs.empty()) {
+    if ((CSRCost.getFrequency() || !TLI->useCSRInsteadOfSplit(VirtReg)) &&
+        isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
       unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
                                               CostPerUseLimit, NewVRegs);
       if (CSRReg || !NewVRegs.empty())
@@ -2704,6 +2716,7 @@
   MF = &mf;
   TRI = MF->getSubtarget().getRegisterInfo();
   TII = MF->getSubtarget().getInstrInfo();
+  TLI = MF->getSubtarget().getTargetLowering();
   RCI.runOnMachineFunction(mf);
 
   EnableLocalReassign = EnableLocalReassignment ||
Index: lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.h
+++ lib/Target/PowerPC/PPCISelLowering.h
@@ -1013,6 +1013,9 @@
     SDValue
     combineElementTruncationToVectorTruncation(SDNode *N,
                                                DAGCombinerInfo &DCI) const;
+    virtual bool useCSRInsteadOfSplit(const LiveInterval &LI) const override;
+    virtual int64_t costOfFirstCSRForBlocks(
+      const SmallVectorImpl<MachineBasicBlock*> &UseMBBs) const override;
   };
 
   namespace PPC {
Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12937,3 +12937,33 @@
     return Imm.isPosZero();
   }
 }
+
+// If the live interval can be spilled, we'd prefer to do so (subject to cost
+// function below).
+bool PPCTargetLowering::useCSRInsteadOfSplit(const LiveInterval &LI) const {
+  return !LI.isSpillable();
+}
+
+static bool hasCall(MachineBasicBlock *MBB) {
+  for (MachineInstr &MI : *MBB)
+    if (MI.isCall())
+      return true;
+  return false;
+}
+
+// Return a high cost for the first use of a callee-saved register if the live
+// range of the value spans basic blocks in which we'd prefer not to use one.
+// This will often defer use of a CSR and give shrink-wrapping an opportunity
+// to sink/hoist the save/restore from entry/exit blocks respectively.
+int64_t PPCTargetLowering::costOfFirstCSRForBlocks(
+  const SmallVectorImpl<MachineBasicBlock*> &UseMBBs) const {
+  // Make uses of callee-saved registers expensive if any blocks in the live
+  // range have no calls. The actual cost may need some adjustment - this is a
+  // rather arbitrary number that seems to produce good performance improvement.
+  const int CostOnBlockWithoutCall = 1 << 15;
+  for (MachineBasicBlock *It : UseMBBs) {
+    if (!hasCall(It))
+      return CostOnBlockWithoutCall;
+  }
+  return TargetLowering::costOfFirstCSRForBlocks(UseMBBs);
+}
Index: test/CodeGen/PowerPC/branch-opt.ll
===================================================================
--- test/CodeGen/PowerPC/branch-opt.ll
+++ test/CodeGen/PowerPC/branch-opt.ll
@@ -11,10 +11,10 @@
 ; One of the blocks ends up with a loop exit block that gets a tail-duplicated copy
 ; of %cond_next48, so there should only be two unconditional branches.
 
-;CHECK: b LBB0_13
-;CHECK: b LBB0_13
-;CHECK-NOT: b LBB0_13
-;CHECK: LBB0_13: ; %cond_next48
+;CHECK: b LBB0_20
+;CHECK: b LBB0_20
+;CHECK-NOT: b LBB0_18
+;CHECK: LBB0_18: ; %cond_next48
 
 define void @foo(i32 %W, i32 %X, i32 %Y, i32 %Z) {
 entry:
Index: test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
===================================================================
--- test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
+++ test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
@@ -91,10 +91,11 @@
 ; 
 ; Loop body
 ; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
+; CHECK: mr [[NEWSUM:[0-9]+]], [[SUM]]
 ; CHECK: bl something
 ; CHECK-DAG: addi [[IV]], [[IV]], -1
-; CHECK-DAG: add [[SUM]], 3, [[SUM]] 
-; CHECK-NEXT: cmplwi [[IV]], 0
+; CHECK-DAG: add [[NEWSUM]], 3, [[NEWSUM]]
+; CHECK: cmplwi [[IV]], 0
 ; CHECK-NEXT: bne 0, .[[LOOP]]
 ;
 ; Next BB.
@@ -159,12 +160,13 @@
 ; Loop preheader
 ; CHECK-DAG: li [[SUM:[0-9]+]], 0
 ; CHECK-DAG: li [[IV:[0-9]+]], 10
-; 
+;
 ; Loop body
 ; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
 ; CHECK: bl something
+; CHECK: mr [[NEWRET:[0-9]+]], 3
 ; CHECK-DAG: addi [[IV]], [[IV]], -1
-; CHECK-DAG: add [[SUM]], 3, [[SUM]] 
+; CHECK-DAG: add [[SUM]], [[NEWRET]], [[SUM]]
 ; CHECK-NEXT: cmplwi [[IV]], 0
 ; CHECK-NEXT: bne 0, .[[LOOP]]
 ;
@@ -301,16 +303,17 @@
 ; Loop preheader
 ; CHECK-DAG: li [[SUM:[0-9]+]], 0
 ; CHECK-DAG: li [[IV:[0-9]+]], 10
-; 
+;
 ; Loop body
 ; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
+; CHECK: mr [[NEWSUM:[0-9]+]], [[SUM]]
 ; CHECK: bl something
 ; CHECK-DAG: addi [[IV]], [[IV]], -1
-; CHECK-DAG: add [[SUM]], 3, [[SUM]] 
+; CHECK-DAG: add [[NEWSUM]], 3, [[NEWSUM]]
 ; CHECK-NEXT: cmplwi [[IV]], 0
-; CHECK-NEXT: bne 0, .[[LOOP]]
+; CHECK: bne 0, .[[LOOP]]
 ;
-; Next BB. 
+; Next BB.
 ; slwi 3, [[SUM]], 3
 ;
 ; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]]
Index: test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll
===================================================================
--- test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll
+++ test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll
@@ -25,10 +25,10 @@
 ; NONINVFUNCDESC-LABEL: @bar
 ; NONINVFUNCDESC: %for.body
 ; NONINVFUNCDESC: std 2, 40(1)
-; NONINVFUNCDESC-DAG: ld 3, 0(30)
-; NONINVFUNCDESC-DAG: ld 11, 16(30)
-; NONINVFUNCDESC-DAG: ld 2, 8(30)
-; NONINVFUNCDESC: mtctr 3
+; NONINVFUNCDESC-DAG: ld 5, 0(3)
+; NONINVFUNCDESC-DAG: ld 11, 16(3)
+; NONINVFUNCDESC-DAG: ld 2, 8(3)
+; NONINVFUNCDESC: mtctr 5
 ; NONINVFUNCDESC: bctrl
 ; NONINVFUNCDESC-NEXT: ld 2, 40(1)
 
Index: test/CodeGen/PowerPC/tail-dup-break-cfg.ll
===================================================================
--- test/CodeGen/PowerPC/tail-dup-break-cfg.ll
+++ test/CodeGen/PowerPC/tail-dup-break-cfg.ll
@@ -11,16 +11,15 @@
 ; exit
 
 ;CHECK-LABEL: tail_dup_break_cfg:
-;CHECK: mr [[TAGREG:[0-9]+]], 3
-;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
-;CHECK-NEXT: bc 12, 1, [[BODY1LABEL:[._0-9A-Za-z]+]]
+;CHECK: andi. {{[0-9]+}}, 3, 1
+;CHECK: bc 12, 1, [[BODY1LABEL:[._0-9A-Za-z]+]]
 ;CHECK-NEXT: # %test2
-;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
+;CHECK-NEXT: rlwinm. {{[0-9]+}}, 3, 0, 30, 30
 ;CHECK-NEXT: bne 0, [[BODY2LABEL:[._0-9A-Za-z]+]]
 ;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit
 ;CHECK: blr
 ;CHECK-NEXT: [[BODY1LABEL]]
-;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
+;CHECK: rlwinm. {{[0-9]+}}, 3, 0, 30, 30
 ;CHECK-NEXT: beq 0, [[EXITLABEL]]
 ;CHECK-NEXT: [[BODY2LABEL:[._0-9A-Za-z]+]]:
 ;CHECK: b [[EXITLABEL]]
@@ -53,12 +52,11 @@
 
 ; The branch weights here hint that we shouldn't tail duplicate in this case.
 ;CHECK-LABEL: tail_dup_dont_break_cfg:
-;CHECK: mr [[TAGREG:[0-9]+]], 3
-;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
-;CHECK-NEXT: bc 4, 1, [[TEST2LABEL:[._0-9A-Za-z]+]]
+;CHECK: andi. {{[0-9]+}}, 3, 1
+;CHECK: bc 4, 1, [[TEST2LABEL:[._0-9A-Za-z]+]]
 ;CHECK-NEXT: # %body1
 ;CHECK: [[TEST2LABEL]]: # %test2
-;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
+;CHECK-NEXT: rlwinm. {{[0-9]+}}, 3, 0, 30, 30
 ;CHECK-NEXT: beq 0, [[EXITLABEL:[._0-9A-Za-z]+]]
 ;CHECK-NEXT: # %body2
 ;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit
Index: test/CodeGen/PowerPC/tail-dup-layout.ll
===================================================================
--- test/CodeGen/PowerPC/tail-dup-layout.ll
+++ test/CodeGen/PowerPC/tail-dup-layout.ll
@@ -21,28 +21,27 @@
 ; the optional blocks and that the optional blocks are in the correct order.
 ;CHECK-LABEL: straight_test:
 ; test1 may have been merged with entry
-;CHECK: mr [[TAGREG:[0-9]+]], 3
-;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
-;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
+;CHECK: andi. {{[0-9]+}}, 3, 1
+;CHECK: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
 ;CHECK-NEXT: # %test2
-;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
+;CHECK-NEXT: rlwinm. {{[0-9]+}}, 3, 0, 30, 30
 ;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
 ;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
-;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
+;CHECK-NEXT: rlwinm. {{[0-9]+}}, 3, 0, 29, 29
 ;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
 ;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4
-;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
+;CHECK-NEXT: rlwinm. {{[0-9]+}}, 3, 0, 28, 28
 ;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]]
 ;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
 ;CHECK: blr
 ;CHECK-NEXT: .[[OPT1LABEL]]:
-;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
+;CHECK: rlwinm. {{[0-9]+}}, 3, 0, 30, 30
 ;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
 ;CHECK-NEXT: .[[OPT2LABEL]]:
-;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
+;CHECK: rlwinm. {{[0-9]+}}, 3, 0, 29, 29
 ;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
 ;CHECK-NEXT: .[[OPT3LABEL]]:
-;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
+;CHECK: rlwinm. {{[0-9]+}}, 3, 0, 28, 28
 ;CHECK-NEXT: beq 0, .[[EXITLABEL]]
 ;CHECK-NEXT: .[[OPT4LABEL]]:
 ;CHECK: b .[[EXITLABEL]]
@@ -114,22 +113,21 @@
 ; the optional blocks and that the optional blocks are in the correct order.
 ;CHECK-LABEL: straight_test_50:
 ; test1 may have been merged with entry
-;CHECK: mr [[TAGREG:[0-9]+]], 3
-;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
-;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
+;CHECK: andi. {{[0-9]+}}, 3, 1
+;CHECK: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
 ;CHECK-NEXT: # %test2
-;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
+;CHECK-NEXT: rlwinm. {{[0-9]+}}, 3, 0, 30, 30
 ;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
 ;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
-;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
+;CHECK-NEXT: rlwinm. {{[0-9]+}}, 3, 0, 29, 29
 ;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
 ;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
 ;CHECK: blr
 ;CHECK-NEXT: .[[OPT1LABEL]]:
-;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
+;CHECK: rlwinm. {{[0-9]+}}, 3, 0, 30, 30
 ;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
 ;CHECK-NEXT: .[[OPT2LABEL]]:
-;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
+;CHECK: rlwinm. {{[0-9]+}}, 3, 0, 29, 29
 ;CHECK-NEXT: beq 0, .[[EXITLABEL]]
 ;CHECK-NEXT: .[[OPT3LABEL]]:
 ;CHECK: b .[[EXITLABEL]]
@@ -182,14 +180,15 @@
 ; and then check that the correct test has been duplicated into the end of
 ; the optional blocks and that the optional blocks are in the correct order.
 ;CHECK-LABEL: loop_test:
-;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4
+;CHECK: extsw [[SEXTREG:[0-9]+]], 4
+;CHECK: add [[TAGPTRREG:[0-9]+]], 3, [[SEXTREG]]
 ;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch
 ;CHECK: addi
 ;CHECK: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check
 ;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]])
 ;CHECK: # %test1
 ;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
-;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]]
+;CHECK: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]]
 ;CHECK-NEXT: # %test2
 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
 ;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]]