Diff 511942

llvm/lib/Analysis/ScalarEvolution.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 8,223 Lines • ▼ Show 20 Lines
	unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,			unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
	const SCEV *ExitCount) {			const SCEV *ExitCount) {
	if (ExitCount == getCouldNotCompute())			if (ExitCount == getCouldNotCompute())
	return 1;			return 1;

	// Get the trip count			// Get the trip count
	const SCEV *TCExpr = getTripCountFromExitCount(ExitCount);			const SCEV *TCExpr = getTripCountFromExitCount(ExitCount);

				// If a trip multiple is huge (>=2^32), the trip count is still divisible by
				// the greatest power of 2 divisor less than 2^32.
				auto GetSmallMultiple = [](unsigned TrailingZeros) {
				return 1U << std::min((uint32_t)31, TrailingZeros);
				};

	const SCEVConstant *TC = dyn_cast<SCEVConstant>(TCExpr);			const SCEVConstant *TC = dyn_cast<SCEVConstant>(TCExpr);
	if (!TC)			if (!TC)
	// Attempt to factor more general cases. Returns the greatest power of			// Attempt to factor more general cases. Returns the greatest power of
	// two divisor. If overflow happens, the trip count expression is still			// two divisor.
	// divisible by the greatest power of 2 divisor returned.			return GetSmallMultiple(
	return 1U << std::min((uint32_t)31,
	GetMinTrailingZeros(applyLoopGuards(TCExpr, L)));			GetMinTrailingZeros(applyLoopGuards(TCExpr, L)));

	ConstantInt *Result = TC->getValue();			ConstantInt *Result = TC->getValue();

	// Guard against huge trip counts (this requires checking			// Guard against huge trip multiples (this requires checking for zero to
	// for zero to handle the case where the trip count == -1 and the			// handle the case where the trip count == -1 and the addition wraps).
				efriedmaUnsubmitted Done Reply Inline Actions If the Result is zero, countTrailingZeros() will return the bitwidth of the integer, I think, which doesn't match the comment. I don't think the wrapping thing can actually happen since D110587 was merged, though, so we shouldn't see a trip count of zero. So maybe update the comment while you're here. (Maybe it's theoretically possible that some combination of folding could end up folding a poison trip count to zero. In that case, though, it wouldn't really matter what multiple we return.) efriedma: If the Result is zero, countTrailingZeros() will return the bitwidth of the integer, I think…
				caojoshuaAuthorUnsubmitted Done Reply Inline Actions You're right, TripCount should never be zero. I added an assertion for this and removed that section of the comment. caojoshua: You're right, TripCount should never be zero. I added an assertion for this and removed that…
	// addition wraps).
	assert(Result && "SCEVConstant expected to have non-null ConstantInt");			assert(Result && "SCEVConstant expected to have non-null ConstantInt");
	if (Result->getValue().getActiveBits() > 32 \|\|			if (Result->getValue().getActiveBits() > 32 \|\|
	Result->getValue().getActiveBits() == 0)			Result->getValue().getActiveBits() == 0)
	return 1;			return GetSmallMultiple(Result->getValue().countTrailingZeros());

	return (unsigned)Result->getZExtValue();			return (unsigned)Result->getZExtValue();
	}			}

	/// Returns the largest constant divisor of the trip count of this loop as a			/// Returns the largest constant divisor of the trip count of this loop as a
	/// normal unsigned value, if possible. This means that the actual trip count is			/// normal unsigned value, if possible. This means that the actual trip count is
	/// always a multiple of the returned value (don't forget the trip count could			/// always a multiple of the returned value (don't forget the trip count could
	/// very well be zero as well!).			/// very well be zero as well!).
	▲ Show 20 Lines • Show All 7,155 Lines • Show Last 20 Lines

llvm/test/Analysis/ScalarEvolution/huge-trip-multiple.ll

	; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2			; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
	; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 \| FileCheck %s			; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 \| FileCheck %s

	; Tests loops with huge trip counts. Trip count of >=2**32 are huge. Huge trip counts have a trip multiple of 1.			; Tests loops with huge trip counts. Trip count of >=2^32 are huge. Huge trip counts have a trip multiple
				; of the greatest power of 2 less than 2^32.

	declare void @foo(...)			declare void @foo(...)

	define void @trip_count_4294967295() {			define void @trip_count_4294967295() {
	; CHECK-LABEL: 'trip_count_4294967295'			; CHECK-LABEL: 'trip_count_4294967295'
	; CHECK-NEXT: Classifying expressions for: @trip_count_4294967295			; CHECK-NEXT: Classifying expressions for: @trip_count_4294967295
	; CHECK-NEXT: %i.02 = phi i64 [ 0, %entry ], [ %add, %for.body ]			; CHECK-NEXT: %i.02 = phi i64 [ 0, %entry ], [ %add, %for.body ]
	; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,4294967295) S: [0,4294967295) Exits: 4294967294 LoopDispositions: { %for.body: Computable }			; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,4294967295) S: [0,4294967295) Exits: 4294967294 LoopDispositions: { %for.body: Computable }
	Show All 29 Lines
	; CHECK-NEXT: %add = add nuw nsw i64 %i.02, 1			; CHECK-NEXT: %add = add nuw nsw i64 %i.02, 1
	; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,4294967297) S: [1,4294967297) Exits: 4294967296 LoopDispositions: { %for.body: Computable }			; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,4294967297) S: [1,4294967297) Exits: 4294967296 LoopDispositions: { %for.body: Computable }
	; CHECK-NEXT: Determining loop execution counts for: @trip_count_4294967296			; CHECK-NEXT: Determining loop execution counts for: @trip_count_4294967296
	; CHECK-NEXT: Loop %for.body: backedge-taken count is 4294967295			; CHECK-NEXT: Loop %for.body: backedge-taken count is 4294967295
	; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 4294967295			; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 4294967295
	; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is 4294967295			; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is 4294967295
	; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 4294967295			; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 4294967295
	; CHECK-NEXT: Predicates:			; CHECK-NEXT: Predicates:
	; CHECK: Loop %for.body: Trip multiple is 1			; CHECK: Loop %for.body: Trip multiple is 2147483648
	;			;
	entry:			entry:
	br label %for.body			br label %for.body

	for.cond.cleanup: ; preds = %for.body			for.cond.cleanup: ; preds = %for.body
	ret void			ret void

	for.body: ; preds = %entry, %for.body			for.body: ; preds = %entry, %for.body
	Show All 12 Lines
	; CHECK-NEXT: %add = add nuw nsw i64 %i.02, 1			; CHECK-NEXT: %add = add nuw nsw i64 %i.02, 1
	; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,8589934593) S: [1,8589934593) Exits: 8589934592 LoopDispositions: { %for.body: Computable }			; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,8589934593) S: [1,8589934593) Exits: 8589934592 LoopDispositions: { %for.body: Computable }
	; CHECK-NEXT: Determining loop execution counts for: @trip_count_8589935692			; CHECK-NEXT: Determining loop execution counts for: @trip_count_8589935692
	; CHECK-NEXT: Loop %for.body: backedge-taken count is 8589934591			; CHECK-NEXT: Loop %for.body: backedge-taken count is 8589934591
	; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 8589934591			; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 8589934591
	; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is 8589934591			; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is 8589934591
	; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 8589934591			; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 8589934591
	; CHECK-NEXT: Predicates:			; CHECK-NEXT: Predicates:
	; CHECK: Loop %for.body: Trip multiple is 1			; CHECK: Loop %for.body: Trip multiple is 2147483648
	;			;
	entry:			entry:
	br label %for.body			br label %for.body

	for.cond.cleanup: ; preds = %for.body			for.cond.cleanup: ; preds = %for.body
	ret void			ret void

	for.body: ; preds = %entry, %for.body			for.body: ; preds = %entry, %for.body
	Show All 12 Lines
	; CHECK-NEXT: %add = add nuw nsw i64 %i.02, 1			; CHECK-NEXT: %add = add nuw nsw i64 %i.02, 1
	; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-9223372036854775807) S: [1,-9223372036854775807) Exits: -9223372036854775808 LoopDispositions: { %for.body: Computable }			; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-9223372036854775807) S: [1,-9223372036854775807) Exits: -9223372036854775808 LoopDispositions: { %for.body: Computable }
	; CHECK-NEXT: Determining loop execution counts for: @trip_count_9223372036854775808			; CHECK-NEXT: Determining loop execution counts for: @trip_count_9223372036854775808
	; CHECK-NEXT: Loop %for.body: backedge-taken count is 9223372036854775807			; CHECK-NEXT: Loop %for.body: backedge-taken count is 9223372036854775807
	; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 9223372036854775807			; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 9223372036854775807
	; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is 9223372036854775807			; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is 9223372036854775807
	; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 9223372036854775807			; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 9223372036854775807
	; CHECK-NEXT: Predicates:			; CHECK-NEXT: Predicates:
	; CHECK: Loop %for.body: Trip multiple is 1			; CHECK: Loop %for.body: Trip multiple is 2147483648
	;			;
	entry:			entry:
	br label %for.body			br label %for.body

	for.cond.cleanup: ; preds = %for.body			for.cond.cleanup: ; preds = %for.body
	ret void			ret void

	for.body: ; preds = %entry, %for.body			for.body: ; preds = %entry, %for.body
	Show All 35 Lines

llvm/test/Transforms/LoopUnroll/X86/mmx.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
	; RUN: opt < %s -S -passes=loop-unroll \| FileCheck %s			; RUN: opt < %s -S -passes=loop-unroll \| FileCheck %s
	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"			target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
	target triple = "x86_64-unknown-linux-gnu"			target triple = "x86_64-unknown-linux-gnu"

	define x86_mmx @f() #0 {			define x86_mmx @f() #0 {
	; CHECK-LABEL: define x86_mmx @f			; CHECK-LABEL: define x86_mmx @f
	; CHECK-SAME: () #[[ATTR0:[0-9]+]] {			; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: br i1 false, label [[EXIT_UNR_LCSSA:%.]], label [[ENTRY_NEW:%.]]
	; CHECK: entry.new:
	; CHECK-NEXT: br label [[FOR_BODY:%.*]]			; CHECK-NEXT: br label [[FOR_BODY:%.*]]
	; CHECK: for.body:			; CHECK: for.body:
	; CHECK-NEXT: [[PHI:%.]] = phi i32 [ 1, [[ENTRY_NEW]] ], [ [[ADD_7:%.]], [[FOR_BODY]] ]			; CHECK-NEXT: [[PHI:%.]] = phi i32 [ 1, [[ENTRY:%.]] ], [ [[ADD_7:%.*]], [[FOR_BODY]] ]
	; CHECK-NEXT: [[NITER:%.]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.]], [[FOR_BODY]] ]
	; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[PHI]], 1			; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[PHI]], 1
	; CHECK-NEXT: [[NITER_NEXT:%.*]] = add nuw nsw i32 [[NITER]], 1
	; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i32 [[ADD]], 1			; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i32 [[ADD]], 1
	; CHECK-NEXT: [[NITER_NEXT_1:%.*]] = add nuw nsw i32 [[NITER_NEXT]], 1
	; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i32 [[ADD_1]], 1			; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i32 [[ADD_1]], 1
	; CHECK-NEXT: [[NITER_NEXT_2:%.*]] = add nuw nsw i32 [[NITER_NEXT_1]], 1
	; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i32 [[ADD_2]], 1			; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i32 [[ADD_2]], 1
	; CHECK-NEXT: [[NITER_NEXT_3:%.*]] = add nuw nsw i32 [[NITER_NEXT_2]], 1
	; CHECK-NEXT: [[ADD_4:%.*]] = add nuw nsw i32 [[ADD_3]], 1			; CHECK-NEXT: [[ADD_4:%.*]] = add nuw nsw i32 [[ADD_3]], 1
	; CHECK-NEXT: [[NITER_NEXT_4:%.*]] = add nuw nsw i32 [[NITER_NEXT_3]], 1
	; CHECK-NEXT: [[ADD_5:%.*]] = add nuw nsw i32 [[ADD_4]], 1			; CHECK-NEXT: [[ADD_5:%.*]] = add nuw nsw i32 [[ADD_4]], 1
	; CHECK-NEXT: [[NITER_NEXT_5:%.*]] = add nuw nsw i32 [[NITER_NEXT_4]], 1
	; CHECK-NEXT: [[ADD_6:%.*]] = add i32 [[ADD_5]], 1			; CHECK-NEXT: [[ADD_6:%.*]] = add i32 [[ADD_5]], 1
	; CHECK-NEXT: [[NITER_NEXT_6:%.*]] = add nuw nsw i32 [[NITER_NEXT_5]], 1
	; CHECK-NEXT: [[ADD_7]] = add nuw nsw i32 [[ADD_6]], 1			; CHECK-NEXT: [[ADD_7]] = add nuw nsw i32 [[ADD_6]], 1
	; CHECK-NEXT: [[NITER_NEXT_7]] = add i32 [[NITER_NEXT_6]], 1			; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i32 [[ADD_6]], 0
	; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i32 [[NITER_NEXT_7]], 0			; CHECK-NEXT: br i1 [[CMP_7]], label [[EXIT:%.*]], label [[FOR_BODY]]
	; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]]
	; CHECK: exit.unr-lcssa.loopexit:
	; CHECK-NEXT: [[RET_PH_PH:%.*]] = phi x86_mmx [ undef, [[FOR_BODY]] ]
	; CHECK-NEXT: [[PHI_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[FOR_BODY]] ]
	; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]]
	; CHECK: exit.unr-lcssa:
	; CHECK-NEXT: [[RET_PH:%.]] = phi x86_mmx [ undef, [[ENTRY:%.]] ], [ [[RET_PH_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
	; CHECK-NEXT: [[PHI_UNR:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[PHI_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
	; CHECK-NEXT: br i1 false, label [[FOR_BODY_EPIL_PREHEADER:%.]], label [[EXIT:%.]]
	; CHECK: for.body.epil.preheader:
	; CHECK-NEXT: br label [[FOR_BODY_EPIL:%.*]]
	; CHECK: for.body.epil:
	; CHECK-NEXT: [[PHI_EPIL:%.]] = phi i32 [ [[PHI_UNR]], [[FOR_BODY_EPIL_PREHEADER]] ], [ [[ADD_EPIL:%.]], [[FOR_BODY_EPIL]] ]
	; CHECK-NEXT: [[EPIL_ITER:%.]] = phi i32 [ 0, [[FOR_BODY_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.]], [[FOR_BODY_EPIL]] ]
	; CHECK-NEXT: [[ADD_EPIL]] = add i32 [[PHI_EPIL]], 1
	; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i32 [[PHI_EPIL]], 0
	; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i32 [[EPIL_ITER]], 1
	; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_NEXT]], 0
	; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY_EPIL]], label [[EXIT_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
	; CHECK: exit.epilog-lcssa:
	; CHECK-NEXT: [[RET_PH1:%.*]] = phi x86_mmx [ undef, [[FOR_BODY_EPIL]] ]
	; CHECK-NEXT: br label [[EXIT]]
	; CHECK: exit:			; CHECK: exit:
	; CHECK-NEXT: [[RET:%.*]] = phi x86_mmx [ [[RET_PH]], [[EXIT_UNR_LCSSA]] ], [ [[RET_PH1]], [[EXIT_EPILOG_LCSSA]] ]			; CHECK-NEXT: [[RET:%.*]] = phi x86_mmx [ undef, [[FOR_BODY]] ]
	; CHECK-NEXT: ret x86_mmx [[RET]]			; CHECK-NEXT: ret x86_mmx [[RET]]
	;			;
	entry:			entry:
	br label %for.body			br label %for.body

	for.body: ; preds = %for.body, %entry			for.body: ; preds = %for.body, %entry
	%phi = phi i32 [ 1, %entry ], [ %add, %for.body ]			%phi = phi i32 [ 1, %entry ], [ %add, %for.body ]
	%add = add i32 %phi, 1			%add = add i32 %phi, 1
	Show All 9 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[SCEV] Strengthen huge constant trip multiples.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 511942

llvm/lib/Analysis/ScalarEvolution.cpp

llvm/test/Analysis/ScalarEvolution/huge-trip-multiple.ll

llvm/test/Transforms/LoopUnroll/X86/mmx.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SCEV] Strengthen huge constant trip multiples.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 511942

llvm/lib/Analysis/ScalarEvolution.cpp

llvm/test/Analysis/ScalarEvolution/huge-trip-multiple.ll

llvm/test/Transforms/LoopUnroll/X86/mmx.ll

[SCEV] Strengthen huge constant trip multiples.
ClosedPublic