Index: lib/CodeGen/MachineLICM.cpp =================================================================== --- lib/CodeGen/MachineLICM.cpp +++ lib/CodeGen/MachineLICM.cpp @@ -1062,6 +1062,11 @@ if (MI.isImplicitDef()) return true; + // Rematerializable instructions should always be hoisted since the register + // allocator can just pull them down again when needed. + if (TII->isTriviallyReMaterializable(MI, AA)) + return true; + // Besides removing computation from the loop, hoisting an instruction has // these effects: // @@ -1083,11 +1088,6 @@ return false; } - // Rematerializable instructions should always be hoisted since the register - // allocator can just pull them down again when needed. - if (TII->isTriviallyReMaterializable(MI, AA)) - return true; - // FIXME: If there are long latency loop-invariant instructions inside the // loop at this point, why didn't the optimizer's LICM hoist them? for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { Index: test/CodeGen/X86/licm-nested.ll =================================================================== --- test/CodeGen/X86/licm-nested.ll +++ test/CodeGen/X86/licm-nested.ll @@ -1,9 +1,11 @@ ; REQUIRES: asserts -; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 4 +; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | FileCheck %s ; MachineLICM should be able to hoist the symbolic addresses out of ; the inner loops. +; CHECK: 6{{.*}}hoisted out of loops + @main.flags = internal global [8193 x i8] zeroinitializer, align 16 ; <[8193 x i8]*> [#uses=3] @.str = private constant [11 x i8] c"Count: %d\0A\00" ; <[11 x i8]*> [#uses=1] Index: test/CodeGen/X86/loop-search.ll =================================================================== --- test/CodeGen/X86/loop-search.ll +++ test/CodeGen/X86/loop-search.ll @@ -0,0 +1,51 @@ +; We should hoist loop constant invariant +; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 %s -filetype=asm -O3 -o - | FileCheck %s + +; Function Attrs: norecurse nounwind readonly uwtable +define zeroext i1 @search(i32 %needle, i32* nocapture readonly %haystack, i32 %count) local_unnamed_addr #0 { +entry: + %cmp5 = icmp sgt i32 %count, 0 + br i1 %cmp5, label %for.body.preheader, label %cleanup + +for.body.preheader: ; preds = %entry + %0 = sext i32 %count to i64 + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp slt i64 %indvars.iv.next, %0 + br i1 %cmp, label %for.body, label %cleanup.loopexit + +for.body: ; preds = %for.body.preheader, %for.cond + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.cond ] + %arrayidx = getelementptr inbounds i32, i32* %haystack, i64 %indvars.iv + %1 = load i32, i32* %arrayidx, align 4, !tbaa !1 + %cmp1 = icmp eq i32 %1, %needle + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br i1 %cmp1, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.cond, %for.body + %.ph = phi i1 [ false, %for.cond ], [ true, %for.body ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry + %2 = phi i1 [ false, %entry ], [ %.ph, %cleanup.loopexit ] + ret i1 %2 +} + +attributes #0 = { norecurse nounwind readonly uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="btver2" "target-features"="+aes,+avx,+bmi,+cx16,+f16c,+fxsr,+lzcnt,+mmx,+pclmul,+popcnt,+prfchw,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 281892)"} +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} + +; CHECK: ## %for.body +; CHECK: cmpl %edi, (%rsi,%rdx,4) +; CHECK-NEXT: je [[LABEL:.+]] +; CHECK: [[LABEL]]: +; CHECK: movb $1, %al +; CHECK: ## %cleanup +; CHECK: retq