Index: lib/CodeGen/MachineLICM.cpp =================================================================== --- lib/CodeGen/MachineLICM.cpp +++ lib/CodeGen/MachineLICM.cpp @@ -1062,6 +1062,11 @@ if (MI.isImplicitDef()) return true; + // Rematerializable instructions should always be hoisted since the register + // allocator can just pull them down again when needed. + if (TII->isTriviallyReMaterializable(MI, AA)) + return true; + // Besides removing computation from the loop, hoisting an instruction has // these effects: // @@ -1083,11 +1088,6 @@ return false; } - // Rematerializable instructions should always be hoisted since the register - // allocator can just pull them down again when needed. - if (TII->isTriviallyReMaterializable(MI, AA)) - return true; - // FIXME: If there are long latency loop-invariant instructions inside the // loop at this point, why didn't the optimizer's LICM hoist them? for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { Index: test/CodeGen/X86/loop-search.ll =================================================================== --- test/CodeGen/X86/loop-search.ll +++ test/CodeGen/X86/loop-search.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s ; This test comes from PR27136 @@ -9,34 +9,31 @@ ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: testl %edx, %edx ; CHECK-NEXT: jle LBB0_1 -; CHECK-NEXT: ## BB#4: ## %for.body.preheader +; CHECK-NEXT: ## BB#2: ## %for.body.preheader ; CHECK-NEXT: movslq %edx, %rcx +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_5: ## %for.body +; CHECK-NEXT: LBB0_4: ## %for.body ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; ### FIXME: This loop invariant should be hoisted -; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: cmpl %edi, (%rsi,%rdx,4) -; CHECK-NEXT: je LBB0_6 -; CHECK-NEXT: ## BB#2: ## %for.cond -; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: je LBB0_5 +; CHECK-NEXT: ## BB#3: ## %for.cond +; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 ; CHECK-NEXT: incq %rdx ; CHECK-NEXT: cmpq %rcx, %rdx -; CHECK-NEXT: jl LBB0_5 -; ### FIXME: BB#3 and LBB0_1 should be merged -; CHECK-NEXT: ## BB#3: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: ## kill: %AL %AL %EAX -; CHECK-NEXT: retq +; CHECK-NEXT: jl LBB0_4 +; CHECK-NEXT: jmp LBB0_6 ; CHECK-NEXT: LBB0_1: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: ## kill: %AL %AL %EAX +; CHECK-NEXT: ## kill: %AL %AL %RAX ; CHECK-NEXT: retq +; CHECK-NEXT: LBB0_5: +; ### This constant loop invariant was moved here from the loop body +; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: LBB0_6: ## %cleanup -; CHECK-NEXT: ## kill: %AL %AL %EAX +; CHECK-NEXT: ## kill: %AL %AL %RAX ; CHECK-NEXT: retq -; entry: %cmp5 = icmp sgt i32 %count, 0 br i1 %cmp5, label %for.body.preheader, label %cleanup