Index: lib/CodeGen/MachineLICM.cpp
===================================================================
--- lib/CodeGen/MachineLICM.cpp
+++ lib/CodeGen/MachineLICM.cpp
@@ -1062,6 +1062,11 @@
   if (MI.isImplicitDef())
     return true;
 
+  // Rematerializable instructions should always be hoisted since the register
+  // allocator can just pull them down again when needed.
+  if (TII->isTriviallyReMaterializable(MI, AA))
+    return true;
+
   // Besides removing computation from the loop, hoisting an instruction has
   // these effects:
   //
@@ -1083,11 +1088,6 @@
     return false;
   }
 
-  // Rematerializable instructions should always be hoisted since the register
-  // allocator can just pull them down again when needed.
-  if (TII->isTriviallyReMaterializable(MI, AA))
-    return true;
-
   // FIXME: If there are long latency loop-invariant instructions inside the
   // loop at this point, why didn't the optimizer's LICM hoist them?
   for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
Index: test/CodeGen/X86/loop-search.ll
===================================================================
--- test/CodeGen/X86/loop-search.ll
+++ test/CodeGen/X86/loop-search.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 
 ; This test comes from PR27136
@@ -9,34 +9,31 @@
 ; CHECK:       ## BB#0: ## %entry
 ; CHECK-NEXT:    testl %edx, %edx
 ; CHECK-NEXT:    jle LBB0_1
-; CHECK-NEXT:  ## BB#4: ## %for.body.preheader
+; CHECK-NEXT:  ## BB#2: ## %for.body.preheader
 ; CHECK-NEXT:    movslq %edx, %rcx
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  LBB0_5: ## %for.body
+; CHECK-NEXT:  LBB0_4: ## %for.body
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-;            ### FIXME: This loop invariant should be hoisted
-; CHECK-NEXT:    movb $1, %al
 ; CHECK-NEXT:    cmpl %edi, (%rsi,%rdx,4)
-; CHECK-NEXT:    je LBB0_6
-; CHECK-NEXT:  ## BB#2: ## %for.cond
-; CHECK-NEXT:    ## in Loop: Header=BB0_5 Depth=1
+; CHECK-NEXT:    je LBB0_5
+; CHECK-NEXT:  ## BB#3: ## %for.cond
+; CHECK-NEXT:    ## in Loop: Header=BB0_4 Depth=1
 ; CHECK-NEXT:    incq %rdx
 ; CHECK-NEXT:    cmpq %rcx, %rdx
-; CHECK-NEXT:    jl LBB0_5
-;            ### FIXME: BB#3 and LBB0_1 should be merged
-; CHECK-NEXT:  ## BB#3:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    jl LBB0_4
+; CHECK-NEXT:    jmp LBB0_6
 ; CHECK-NEXT:  LBB0_1:
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %RAX<kill>
 ; CHECK-NEXT:    retq
+; CHECK-NEXT:  LBB0_5:
+;            ### This constant loop invariant was moved here from the loop body
+; CHECK-NEXT:    movb $1, %al
 ; CHECK-NEXT:  LBB0_6: ## %cleanup
-; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %RAX<kill>
 ; CHECK-NEXT:    retq
-;
 entry:
   %cmp5 = icmp sgt i32 %count, 0
   br i1 %cmp5, label %for.body.preheader, label %cleanup