Index: lib/CodeGen/MachineLICM.cpp =================================================================== --- lib/CodeGen/MachineLICM.cpp +++ lib/CodeGen/MachineLICM.cpp @@ -1062,6 +1062,11 @@ if (MI.isImplicitDef()) return true; + // Rematerializable instructions should always be hoisted since the register + // allocator can just pull them down again when needed. + if (TII->isTriviallyReMaterializable(MI, AA)) + return true; + // Besides removing computation from the loop, hoisting an instruction has // these effects: // @@ -1083,11 +1088,6 @@ return false; } - // Rematerializable instructions should always be hoisted since the register - // allocator can just pull them down again when needed. - if (TII->isTriviallyReMaterializable(MI, AA)) - return true; - // FIXME: If there are long latency loop-invariant instructions inside the // loop at this point, why didn't the optimizer's LICM hoist them? for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { Index: test/CodeGen/X86/licm-nested.ll =================================================================== --- test/CodeGen/X86/licm-nested.ll +++ test/CodeGen/X86/licm-nested.ll @@ -1,9 +1,11 @@ ; REQUIRES: asserts -; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 4 +; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | FileCheck %s ; MachineLICM should be able to hoist the symbolic addresses out of ; the inner loops. +; CHECK: 6{{.*}}hoisted out of loops + @main.flags = internal global [8193 x i8] zeroinitializer, align 16 ; <[8193 x i8]*> [#uses=3] @.str = private constant [11 x i8] c"Count: %d\0A\00" ; <[11 x i8]*> [#uses=1] Index: test/CodeGen/X86/loop-search.ll =================================================================== --- test/CodeGen/X86/loop-search.ll +++ test/CodeGen/X86/loop-search.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s + +; This test comes from PR27136 +; We should hoist loop constant invariant + +define zeroext i1 @search(i32 %needle, i32* nocapture readonly %haystack, i32 %count) { +; CHECK-LABEL: search: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: jle LBB0_1 +; CHECK-NEXT: ## BB#2: ## %for.body.preheader +; CHECK-NEXT: movslq %edx, %rcx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_4: ## %for.body +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpl %edi, (%rsi,%rdx,4) +; CHECK-NEXT: je LBB0_5 +; CHECK-NEXT: ## BB#3: ## %for.cond +; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: incq %rdx +; CHECK-NEXT: cmpq %rcx, %rdx +; CHECK-NEXT: jl LBB0_4 +; CHECK-NEXT: jmp LBB0_6 +; CHECK-NEXT: LBB0_1: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ## kill: %AL %AL %RAX +; CHECK-NEXT: retq +; CHECK-NEXT: LBB0_5: +; ## the TRUE result value moved here from %for.body +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: LBB0_6: ## %cleanup +; CHECK-NEXT: ## kill: %AL %AL %RAX +; CHECK-NEXT: retq +; +entry: + %cmp5 = icmp sgt i32 %count, 0 + br i1 %cmp5, label %for.body.preheader, label %cleanup + +for.body.preheader: ; preds = %entry + %0 = sext i32 %count to i64 + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp slt i64 %indvars.iv.next, %0 + br i1 %cmp, label %for.body, label %cleanup.loopexit + +for.body: ; preds = %for.body.preheader, %for.cond + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.cond ] + %arrayidx = getelementptr inbounds i32, i32* %haystack, i64 %indvars.iv + %1 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp eq i32 %1, %needle + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br i1 %cmp1, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.cond, %for.body + %.ph = phi i1 [ false, %for.cond ], [ true, %for.body ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry + %2 = phi i1 [ false, %entry ], [ %.ph, %cleanup.loopexit ] + ret i1 %2 +}