Index: lib/CodeGen/MachineLICM.cpp
===================================================================
--- lib/CodeGen/MachineLICM.cpp
+++ lib/CodeGen/MachineLICM.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -32,6 +33,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -54,11 +56,17 @@
                 cl::desc("MachineLICM should hoist even cheap instructions"),
                 cl::init(false), cl::Hidden);
 
-static cl::opt<bool>
-SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
-                       cl::desc("MachineLICM should sink instructions into "
-                                "loops to avoid register spills"),
-                       cl::init(false), cl::Hidden);
+static cl::opt<bool> SinkInstsToAvoidSpills(
+    "sink-insts-to-avoid-spills",
+    cl::desc("MachineLICM should sink instructions into loops to avoid "
+             "register spills"),
+    cl::init(false), cl::Hidden);
+
+static cl::opt<uint32_t> SinkThresholdDenominator(
+    "sink-threshold-denominator",
+    cl::desc("Sink instrutions into the loop only if their probability of "
+             "being executed in each iteration is less than 1/N"),
+    cl::init(5), cl::Hidden);
 
 STATISTIC(NumHoisted,
           "Number of machine instructions hoisted out of loops");
@@ -76,6 +84,7 @@
     const TargetInstrInfo *TII;
     const TargetLoweringBase *TLI;
     const TargetRegisterInfo *TRI;
+    const MachineBlockFrequencyInfo *MBFI;
     const MachineFrameInfo *MFI;
     MachineRegisterInfo *MRI;
     const InstrItineraryData *InstrItins;
@@ -140,6 +149,7 @@
     bool runOnMachineFunction(MachineFunction &MF) override;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<MachineBlockFrequencyInfo>();
       AU.addRequired<MachineLoopInfo>();
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<AliasAnalysis>();
@@ -312,6 +322,7 @@
 char &llvm::MachineLICMID = MachineLICM::ID;
 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
                 "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
@@ -340,6 +351,7 @@
   TII = MF.getSubtarget().getInstrInfo();
   TLI = MF.getSubtarget().getTargetLowering();
   TRI = MF.getSubtarget().getRegisterInfo();
+  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
   MFI = MF.getFrameInfo();
   MRI = &MF.getRegInfo();
   InstrItins = MF.getSubtarget().getInstrItineraryData();
@@ -790,6 +802,15 @@
   if (!Preheader)
     return;
 
+  // We are sinking instructions into the loop only if they aren't 'likely' to
+  // be executed during every iteration.
+  // FIXME: Instead of having a fix threshold, the probability could be a
+  // factor of the a cost model.
+  const BranchProbability ColdProb(1, 5);
+  const BlockFrequency ThresholdFreq =
+      MBFI->getBlockFreq(CurLoop->getHeader()) *
+      BranchProbability(1, SinkThresholdDenominator);
+
   SmallVector<MachineInstr *, 8> Candidates;
   for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
        I != Preheader->instr_end(); ++I) {
@@ -808,14 +829,6 @@
     bool CanSink = true;
     MachineBasicBlock *B = nullptr;
     for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
-      // FIXME: Come up with a proper cost model that estimates whether sinking
-      // the instruction (and thus possibly executing it on every loop
-      // iteration) is more expensive than a register.
-      // For now assumes that copies are cheap and thus almost always worth it.
-      if (!MI.isCopy()) {
-        CanSink = false;
-        break;
-      }
       if (!B) {
         B = MI.getParent();
         continue;
@@ -828,6 +841,10 @@
     }
     if (!CanSink || !B || B == Preheader)
       continue;
+
+    if (MBFI->getBlockFreq(B) > ThresholdFreq)
+      continue;
+
     B->splice(B->getFirstNonPHI(), Preheader, I);
   }
 }
Index: test/CodeGen/X86/sink-cheap-instructions.ll
===================================================================
--- test/CodeGen/X86/sink-cheap-instructions.ll
+++ test/CodeGen/X86/sink-cheap-instructions.ll
@@ -6,6 +6,8 @@
 
 ; CHECK: Spill
 ; SINK-NOT: Spill
+; SINK: lea{{.*}}20
+; SINK-NEXT: jmp
 
 %struct.A = type { i32, i32, i32, i32, i32, i32 }
 
@@ -20,6 +22,7 @@
 
 .backedge:
   %.0 = phi i8* [ %input, %0 ], [ %7, %.backedge.backedge ]
+  tail call void @_Z6assignPj(i32* %6)
   %7 = getelementptr inbounds i8, i8* %.0, i64 1
   %8 = load i8, i8* %7, align 1
   switch i8 %8, label %.backedge.backedge [
@@ -28,7 +31,6 @@
     i8 20, label %11
     i8 30, label %12
     i8 40, label %13
-    i8 50, label %14
   ]
 
 ; <label>:9
@@ -53,10 +55,6 @@
 ; <label>:13
   tail call void @_Z6assignPj(i32* %5)
   br label %.backedge.backedge
-
-; <label>:14
-  tail call void @_Z6assignPj(i32* %6)
-  br label %.backedge.backedge
 }
 
 declare void @_Z6assignPj(i32*)