Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -208,7 +208,7 @@ "The cost of a loop that is considered 'small' by the interleaver.")); static cl::opt LoopVectorizeWithBlockFrequency( - "loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, + "loop-vectorize-with-block-frequency", cl::init(true), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions.")); @@ -8350,7 +8350,10 @@ unsigned ExpectedTC = SE->getSmallConstantMaxTripCount(L); bool HasExpectedTC = (ExpectedTC > 0); - if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) { + // ExpectedTC may be large because it's bound by a variable. Check + // profiling information to validate we should vectorize. + if ((!HasExpectedTC || ExpectedTC >= TinyTripCountVectorThreshold) + && LoopVectorizeWithBlockFrequency) { auto EstimatedTC = getLoopEstimatedTripCount(L); if (EstimatedTC) { ExpectedTC = *EstimatedTC; Index: test/Transforms/LoopVectorize/tripcount.ll =================================================================== --- test/Transforms/LoopVectorize/tripcount.ll +++ test/Transforms/LoopVectorize/tripcount.ll @@ -57,7 +57,7 @@ } define i32 @foo_low_trip_count3(i1 %cond, i32 %bound) !prof !0 { -; The loop has low invocation count compare to the function invocation count, +; The loop has low invocation count compare to the function invocation count, ; but has a high trip count per invocation. Vectorize it. ; CHECK-LABEL: @foo_low_trip_count3( @@ -84,6 +84,30 @@ ret i32 0 } +define i32 @foo_low_trip_count_icmp_sgt(i32 %bound) { +; Simple loop with low tripcount and inequality test for exit. +; Should not be vectorized. + +; CHECK-LABEL: @foo_low_trip_count_icmp_sgt( +; CHECK-NOT: <{{[0-9]+}} x i8> + +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 + %0 = load i8, i8* %arrayidx, align 1 + %cmp1 = icmp eq i8 %0, 0 + %. = select i1 %cmp1, i8 2, i8 1 + store i8 %., i8* %arrayidx, align 1 + %inc = add nsw i32 %i.08, 1 + %exitcond = icmp sgt i32 %i.08, %bound + br i1 %exitcond, label %for.end, label %for.body, !prof !1 + +for.end: ; preds = %for.body + ret i32 0 +} !0 = !{!"function_entry_count", i64 100} !1 = !{!"branch_weights", i32 100, i32 0}