Diff 133324

llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 202 Lines • ▼ Show 20 Lines	cl::desc("A flag that overrides the target's expected cost for "
"useful for getting consistent testing."));		"useful for getting consistent testing."));

static cl::opt<unsigned> SmallLoopCost(		static cl::opt<unsigned> SmallLoopCost(
"small-loop-cost", cl::init(20), cl::Hidden,		"small-loop-cost", cl::init(20), cl::Hidden,
cl::desc(		cl::desc(
"The cost of a loop that is considered 'small' by the interleaver."));		"The cost of a loop that is considered 'small' by the interleaver."));

static cl::opt<bool> LoopVectorizeWithBlockFrequency(		static cl::opt<bool> LoopVectorizeWithBlockFrequency(
"loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden,		"loop-vectorize-with-block-frequency", cl::init(true), cl::Hidden,
cl::desc("Enable the use of the block frequency analysis to access PGO "		cl::desc("Enable the use of the block frequency analysis to access PGO "
"heuristics minimizing code growth in cold regions and being more "		"heuristics minimizing code growth in cold regions and being more "
"aggressive in hot regions."));		"aggressive in hot regions."));

// Runtime interleave loops for load/store throughput.		// Runtime interleave loops for load/store throughput.
static cl::opt<bool> EnableLoadStoreRuntimeInterleave(		static cl::opt<bool> EnableLoadStoreRuntimeInterleave(
"enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden,		"enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden,
cl::desc(		cl::desc(
▲ Show 20 Lines • Show All 8,122 Lines • ▼ Show 20 Lines	#endif /* NDEBUG */

// Check the function attributes to find out if this function should be		// Check the function attributes to find out if this function should be
// optimized for size.		// optimized for size.
bool OptForSize =		bool OptForSize =
Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();		Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();

// Check the loop for a trip count threshold: vectorize loops with a tiny trip		// Check the loop for a trip count threshold: vectorize loops with a tiny trip
// count by optimizing for size, to minimize overheads.		// count by optimizing for size, to minimize overheads.
unsigned ExpectedTC = SE->getSmallConstantMaxTripCount(L);		// Prefer constant trip counts over profile data, over upper bound estimate.
bool HasExpectedTC = (ExpectedTC > 0);		unsigned ExpectedTC = 0;
		bool HasExpectedTC = false;
		if (const SCEVConstant *ConstExits =
		dyn_cast<SCEVConstant>(SE->getBackedgeTakenCount(L))) {
		const APInt &ExitsCount = ConstExits->getAPInt();
		// We are interested in small values for ExpectedTC. Skip over those that
		// can't fit an unsigned.
		if (ExitsCount.ult(std::numeric_limits<unsigned>::max())) {
		ExpectedTC = static_cast<unsigned>(ExitsCount.getZExtValue()) + 1;
		HasExpectedTC = true;
		}
		}
		// ExpectedTC may be large because it's bound by a variable. Check
		// profiling information to validate we should vectorize.
if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) {		if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) {
auto EstimatedTC = getLoopEstimatedTripCount(L);		auto EstimatedTC = getLoopEstimatedTripCount(L);
if (EstimatedTC) {		if (EstimatedTC) {
ExpectedTC = *EstimatedTC;		ExpectedTC = *EstimatedTC;
HasExpectedTC = true;		HasExpectedTC = true;
}		}
}		}
		if (!HasExpectedTC) {
		ExpectedTC = SE->getSmallConstantMaxTripCount(L);
		HasExpectedTC = (ExpectedTC > 0);
		}

if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) {		if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) {
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "		DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
<< "This loop is worth vectorizing only if no scalar "		<< "This loop is worth vectorizing only if no scalar "
<< "iteration overheads are incurred.");		<< "iteration overheads are incurred.");
if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)		if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)
DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");		DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
else {		else {
▲ Show 20 Lines • Show All 277 Lines • Show Last 20 Lines

llvm/trunk/test/Transforms/LoopVectorize/tripcount.ll

Show First 20 Lines • Show All 51 Lines • ▼ Show 20 Lines	for.body: ; preds = %for.body, %entry
%exitcond = icmp eq i32 %i.08, %bound		%exitcond = icmp eq i32 %i.08, %bound
br i1 %exitcond, label %for.end, label %for.body, !prof !1		br i1 %exitcond, label %for.end, label %for.body, !prof !1

for.end: ; preds = %for.body		for.end: ; preds = %for.body
ret i32 0		ret i32 0
}		}

define i32 @foo_low_trip_count3(i1 %cond, i32 %bound) !prof !0 {		define i32 @foo_low_trip_count3(i1 %cond, i32 %bound) !prof !0 {
; The loop has low invocation count compare to the function invocation count,		; The loop has low invocation count compare to the function invocation count,
; but has a high trip count per invocation. Vectorize it.		; but has a high trip count per invocation. Vectorize it.

; CHECK-LABEL: @foo_low_trip_count3(		; CHECK-LABEL: @foo_low_trip_count3(
; CHECK: vector.body:		; CHECK: vector.body:

entry:		entry:
br i1 %cond, label %for.preheader, label %for.end, !prof !2		br i1 %cond, label %for.preheader, label %for.end, !prof !2

Show All 10 Lines	for.body: ; preds = %for.body, %entry
%inc = add nsw i32 %i.08, 1		%inc = add nsw i32 %i.08, 1
%exitcond = icmp eq i32 %i.08, %bound		%exitcond = icmp eq i32 %i.08, %bound
br i1 %exitcond, label %for.end, label %for.body, !prof !3		br i1 %exitcond, label %for.end, label %for.body, !prof !3

for.end: ; preds = %for.body		for.end: ; preds = %for.body
ret i32 0		ret i32 0
}		}

		define i32 @foo_low_trip_count_icmp_sgt(i32 %bound) {
		; Simple loop with low tripcount and inequality test for exit.
		; Should not be vectorized.

		; CHECK-LABEL: @foo_low_trip_count_icmp_sgt(
		; CHECK-NOT: <{{[0-9]+}} x i8>

		entry:
		br label %for.body

		for.body: ; preds = %for.body, %entry
		%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
		%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
		%0 = load i8, i8* %arrayidx, align 1
		%cmp1 = icmp eq i8 %0, 0
		%. = select i1 %cmp1, i8 2, i8 1
		store i8 %., i8* %arrayidx, align 1
		%inc = add nsw i32 %i.08, 1
		%exitcond = icmp sgt i32 %i.08, %bound
		br i1 %exitcond, label %for.end, label %for.body, !prof !1

		for.end: ; preds = %for.body
		ret i32 0
		}

		define i32 @const_low_trip_count() {
		; Simple loop with constant, small trip count and no profiling info.

		; CHECK-LABEL: @const_low_trip_count
		; CHECK-NOT: <{{[0-9]+}} x i8>

		entry:
		br label %for.body

		for.body: ; preds = %for.body, %entry
		%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
		%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
		%0 = load i8, i8* %arrayidx, align 1
		%cmp1 = icmp eq i8 %0, 0
		%. = select i1 %cmp1, i8 2, i8 1
		store i8 %., i8* %arrayidx, align 1
		%inc = add nsw i32 %i.08, 1
		%exitcond = icmp slt i32 %i.08, 2
		br i1 %exitcond, label %for.body, label %for.end

		for.end: ; preds = %for.body
		ret i32 0
		}

		define i32 @const_large_trip_count() {
		; Simple loop with constant large trip count and no profiling info.

		; CHECK-LABEL: @const_large_trip_count
		; CHECK: <{{[0-9]+}} x i8>

		entry:
		br label %for.body

		for.body: ; preds = %for.body, %entry
		%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
		%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
		%0 = load i8, i8* %arrayidx, align 1
		%cmp1 = icmp eq i8 %0, 0
		%. = select i1 %cmp1, i8 2, i8 1
		store i8 %., i8* %arrayidx, align 1
		%inc = add nsw i32 %i.08, 1
		%exitcond = icmp slt i32 %i.08, 1000
		br i1 %exitcond, label %for.body, label %for.end

		for.end: ; preds = %for.body
		ret i32 0
		}

		define i32 @const_small_trip_count_step() {
		; Simple loop with static, small trip count and no profiling info.

		; CHECK-LABEL: @const_small_trip_count_step
		; CHECK-NOT: <{{[0-9]+}} x i8>

		entry:
		br label %for.body

		for.body: ; preds = %for.body, %entry
		%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
		%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
		%0 = load i8, i8* %arrayidx, align 1
		%cmp1 = icmp eq i8 %0, 0
		%. = select i1 %cmp1, i8 2, i8 1
		store i8 %., i8* %arrayidx, align 1
		%inc = add nsw i32 %i.08, 5
		%exitcond = icmp slt i32 %i.08, 10
		br i1 %exitcond, label %for.body, label %for.end

		for.end: ; preds = %for.body
		ret i32 0
		}

		define i32 @const_trip_over_profile() {
		; constant trip count takes precedence over profile data

		; CHECK-LABEL: @const_trip_over_profile
		; CHECK: <{{[0-9]+}} x i8>

		entry:
		br label %for.body

		for.body: ; preds = %for.body, %entry
		%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
		%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
		%0 = load i8, i8* %arrayidx, align 1
		%cmp1 = icmp eq i8 %0, 0
		%. = select i1 %cmp1, i8 2, i8 1
		store i8 %., i8* %arrayidx, align 1
		%inc = add nsw i32 %i.08, 1
		%exitcond = icmp slt i32 %i.08, 1000
		br i1 %exitcond, label %for.body, label %for.end, !prof !1

		for.end: ; preds = %for.body
		ret i32 0
		}

!0 = !{!"function_entry_count", i64 100}		!0 = !{!"function_entry_count", i64 100}
!1 = !{!"branch_weights", i32 100, i32 0}		!1 = !{!"branch_weights", i32 100, i32 0}
!2 = !{!"branch_weights", i32 10, i32 90}		!2 = !{!"branch_weights", i32 10, i32 90}
!3 = !{!"branch_weights", i32 10, i32 10000}		!3 = !{!"branch_weights", i32 10, i32 10000}

This is an archive of the discontinued LLVM Phabricator instance.

Verify profile data confirms large loop trip counts.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 133324

llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/trunk/test/Transforms/LoopVectorize/tripcount.ll

This is an archive of the discontinued LLVM Phabricator instance.

Verify profile data confirms large loop trip counts.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 133324

llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/trunk/test/Transforms/LoopVectorize/tripcount.ll

Verify profile data confirms large loop trip counts.
ClosedPublic