This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Analysis/
-
Analysis/
2/2
ValueTracking.cpp
-
test/Transforms/InstSimplify/
-
Transforms/
-
InstSimplify/
-
compare.ll

Differential D89976

[ValueTracking] add range limits for ctpop
ClosedPublic

Authored by spatel on Oct 22 2020, 10:46 AM.

Download Raw Diff

Details

Reviewers

davezarzycki
lebedev.ri
RKSimon
nikic

Commits

rG748ecc6b3260: [ValueTracking] add range limits for ctpop

Summary

As discussed in D89952, instcombine can sometimes find a way to reduce similar patterns, but it is incomplete.
InstSimplify uses the computeConstantRange() ValueTracking analysis via simplifyICmpWithConstant(), so we just need to fill in the max value of ctpop to process any "icmp pred ctpop(X), C" pattern (the min value is initialized to zero automatically).

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

spatel created this revision.Oct 22 2020, 10:46 AM

Herald added a project: Restricted Project. · View Herald TranscriptOct 22 2020, 10:46 AM

Herald added subscribers: hiraditya, mcrosier. · View Herald Transcript

spatel requested review of this revision.Oct 22 2020, 10:46 AM

FYI -- I'm not sure if it'll change the test impact of this patch, but I did commit a bunch of x86 vector popcnt tests to the repository this morning.

In D89976#2348050, @davezarzycki wrote:

FYI -- I'm not sure if it'll change the test impact of this patch, but I did commit a bunch of x86 vector popcnt tests to the repository this morning.

IIUC those were codegen tests -- 8556f38 -- so there won't be any overlap with the change here. I think some targets do run -instsimplify as a pre-codegen IR pass, but x86 is not one of them.
In general, we want to have this kind of analysis in IR to efficiently reduce code as quickly as possible. If there's evidence that these patterns are created later in the pipeline, then we could repeat the transforms in codegen. But we don't want to have that redundancy blindly because everything has a compile-time cost.

Thanks. Unless less I missed it, we should also add the upper range for count leading/trailing zero too.

In D89976#2348230, @davezarzycki wrote:

Thanks. Unless less I missed it, we should also add the upper range for count leading/trailing zero too.

Yes - it should just be adding the case line on top of this and adding tests. But I prefer to do it in steps, so it's easier to debug if we hit any problems.

LGTM

llvm/lib/Analysis/ValueTracking.cpp
6465	Use `Width` instead of `II.getType()->getScalarSizeInBits()`?

This revision is now accepted and ready to land.Oct 22 2020, 2:03 PM

spatel marked an inline comment as done.Oct 23 2020, 5:16 AM

spatel added inline comments.

llvm/lib/Analysis/ValueTracking.cpp
6465	Yes, we already have the bit-width here. I'll also add an assert for 'Lower', so the zero initializer assumption doesn't break.

Closed by commit rG748ecc6b3260: [ValueTracking] add range limits for ctpop (authored by spatel). · Explain WhyOct 23 2020, 5:18 AM

This revision was automatically updated to reflect the committed changes.

spatel marked an inline comment as done.

spatel added a commit: rG748ecc6b3260: [ValueTracking] add range limits for ctpop.

spatel mentioned this in rGc72198079df6: [ValueTracking] add range limits for cttz.Oct 23 2020, 6:04 AM

spatel mentioned this in rG3fb0d6b0d55b: [ValueTracking] add range limits for ctlz.

Revision Contents

Path

Size

llvm/

lib/

Analysis/

ValueTracking.cpp

5 lines

test/

Transforms/

InstSimplify/

compare.ll

32 lines

Diff 300241

llvm/lib/Analysis/ValueTracking.cpp

Show First 20 Lines • Show All 6,454 Lines • ▼ Show 20 Lines	static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
}		}
}		}

static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower,		static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower,
APInt &Upper) {		APInt &Upper) {
unsigned Width = Lower.getBitWidth();		unsigned Width = Lower.getBitWidth();
const APInt *C;		const APInt *C;
switch (II.getIntrinsicID()) {		switch (II.getIntrinsicID()) {
		case Intrinsic::ctpop:
		// Maximum of set bits is the bit width.
		assert(Lower == 0 && "Expected lower bound to be zero");
		nikicUnsubmitted Done Reply Inline Actions Use `Width` instead of `II.getType()->getScalarSizeInBits()`? nikic: Use `Width` instead of `II.getType()->getScalarSizeInBits()`?
		spatelAuthorUnsubmitted Done Reply Inline Actions Yes, we already have the bit-width here. I'll also add an assert for 'Lower', so the zero initializer assumption doesn't break. spatel: Yes, we already have the bit-width here. I'll also add an assert for 'Lower', so the zero…
		Upper = Width + 1;
		break;
case Intrinsic::uadd_sat:		case Intrinsic::uadd_sat:
// uadd.sat(x, C) produces [C, UINT_MAX].		// uadd.sat(x, C) produces [C, UINT_MAX].
if (match(II.getOperand(0), m_APInt(C)) \|\|		if (match(II.getOperand(0), m_APInt(C)) \|\|
match(II.getOperand(1), m_APInt(C)))		match(II.getOperand(1), m_APInt(C)))
Lower = *C;		Lower = *C;
break;		break;
case Intrinsic::sadd_sat:		case Intrinsic::sadd_sat:
if (match(II.getOperand(0), m_APInt(C)) \|\|		if (match(II.getOperand(0), m_APInt(C)) \|\|
▲ Show 20 Lines • Show All 303 Lines • Show Last 20 Lines

llvm/test/Transforms/InstSimplify/compare.ll

	Show First 20 Lines • Show All 2,067 Lines • ▼ Show 20 Lines
	}			}

	declare i11 @llvm.ctpop.i11(i11)			declare i11 @llvm.ctpop.i11(i11)
	declare i73 @llvm.ctpop.i73(i73)			declare i73 @llvm.ctpop.i73(i73)
	declare <2 x i13> @llvm.ctpop.v2i13(<2 x i13>)			declare <2 x i13> @llvm.ctpop.v2i13(<2 x i13>)

	define i1 @ctpop_sgt_bitwidth(i11 %x) {			define i1 @ctpop_sgt_bitwidth(i11 %x) {
	; CHECK-LABEL: @ctpop_sgt_bitwidth(			; CHECK-LABEL: @ctpop_sgt_bitwidth(
	; CHECK-NEXT: [[POP:%.]] = call i11 @llvm.ctpop.i11(i11 [[X:%.]])			; CHECK-NEXT: ret i1 false
	; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i11 [[POP]], 11
	; CHECK-NEXT: ret i1 [[CMP]]
	;			;
	%pop = call i11 @llvm.ctpop.i11(i11 %x)			%pop = call i11 @llvm.ctpop.i11(i11 %x)
	%cmp = icmp sgt i11 %pop, 11			%cmp = icmp sgt i11 %pop, 11
	ret i1 %cmp			ret i1 %cmp
	}			}

	define i1 @ctpop_sle_minus1(i11 %x) {			define i1 @ctpop_sle_minus1(i11 %x) {
	; CHECK-LABEL: @ctpop_sle_minus1(			; CHECK-LABEL: @ctpop_sle_minus1(
	; CHECK-NEXT: [[POP:%.]] = call i11 @llvm.ctpop.i11(i11 [[X:%.]])			; CHECK-NEXT: ret i1 false
	; CHECK-NEXT: [[CMP:%.*]] = icmp sle i11 [[POP]], -1
	; CHECK-NEXT: ret i1 [[CMP]]
	;			;
	%pop = call i11 @llvm.ctpop.i11(i11 %x)			%pop = call i11 @llvm.ctpop.i11(i11 %x)
	%cmp = icmp sle i11 %pop, -1			%cmp = icmp sle i11 %pop, -1
	ret i1 %cmp			ret i1 %cmp
	}			}

	define i1 @ctpop_ugt_bitwidth(i73 %x) {			define i1 @ctpop_ugt_bitwidth(i73 %x) {
	; CHECK-LABEL: @ctpop_ugt_bitwidth(			; CHECK-LABEL: @ctpop_ugt_bitwidth(
	; CHECK-NEXT: [[POP:%.]] = call i73 @llvm.ctpop.i73(i73 [[X:%.]])			; CHECK-NEXT: ret i1 false
	; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i73 [[POP]], 73
	; CHECK-NEXT: ret i1 [[CMP]]
	;			;
	%pop = call i73 @llvm.ctpop.i73(i73 %x)			%pop = call i73 @llvm.ctpop.i73(i73 %x)
	%cmp = icmp ugt i73 %pop, 73			%cmp = icmp ugt i73 %pop, 73
	ret i1 %cmp			ret i1 %cmp
	}			}

				; Negative test - does not simplify, but instcombine could reduce this.

	define i1 @ctpop_ugt_bitwidth_minus1(i73 %x) {			define i1 @ctpop_ugt_bitwidth_minus1(i73 %x) {
	; CHECK-LABEL: @ctpop_ugt_bitwidth_minus1(			; CHECK-LABEL: @ctpop_ugt_bitwidth_minus1(
	; CHECK-NEXT: [[POP:%.]] = call i73 @llvm.ctpop.i73(i73 [[X:%.]])			; CHECK-NEXT: [[POP:%.]] = call i73 @llvm.ctpop.i73(i73 [[X:%.]])
	; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i73 [[POP]], 72			; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i73 [[POP]], 72
	; CHECK-NEXT: ret i1 [[CMP]]			; CHECK-NEXT: ret i1 [[CMP]]
	;			;
	%pop = call i73 @llvm.ctpop.i73(i73 %x)			%pop = call i73 @llvm.ctpop.i73(i73 %x)
	%cmp = icmp ugt i73 %pop, 72			%cmp = icmp ugt i73 %pop, 72
	ret i1 %cmp			ret i1 %cmp
	}			}

	define <2 x i1> @ctpop_sgt_bitwidth_splat(<2 x i13> %x) {			define <2 x i1> @ctpop_sgt_bitwidth_splat(<2 x i13> %x) {
	; CHECK-LABEL: @ctpop_sgt_bitwidth_splat(			; CHECK-LABEL: @ctpop_sgt_bitwidth_splat(
	; CHECK-NEXT: [[POP:%.]] = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> [[X:%.]])			; CHECK-NEXT: ret <2 x i1> zeroinitializer
	; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i13> [[POP]], <i13 13, i13 13>
	; CHECK-NEXT: ret <2 x i1> [[CMP]]
	;			;
	%pop = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> %x)			%pop = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> %x)
	%cmp = icmp sgt <2 x i13> %pop, <i13 13, i13 13>			%cmp = icmp sgt <2 x i13> %pop, <i13 13, i13 13>
	ret <2 x i1> %cmp			ret <2 x i1> %cmp
	}			}

	define i1 @ctpop_ult_plus1_bitwidth(i11 %x) {			define i1 @ctpop_ult_plus1_bitwidth(i11 %x) {
	; CHECK-LABEL: @ctpop_ult_plus1_bitwidth(			; CHECK-LABEL: @ctpop_ult_plus1_bitwidth(
	; CHECK-NEXT: [[POP:%.]] = call i11 @llvm.ctpop.i11(i11 [[X:%.]])			; CHECK-NEXT: ret i1 true
	; CHECK-NEXT: [[CMP:%.*]] = icmp ult i11 [[POP]], 12
	; CHECK-NEXT: ret i1 [[CMP]]
	;			;
	%pop = call i11 @llvm.ctpop.i11(i11 %x)			%pop = call i11 @llvm.ctpop.i11(i11 %x)
	%cmp = icmp ult i11 %pop, 12			%cmp = icmp ult i11 %pop, 12
	ret i1 %cmp			ret i1 %cmp
	}			}

	define i1 @ctpop_ne_big_bitwidth(i73 %x) {			define i1 @ctpop_ne_big_bitwidth(i73 %x) {
	; CHECK-LABEL: @ctpop_ne_big_bitwidth(			; CHECK-LABEL: @ctpop_ne_big_bitwidth(
	; CHECK-NEXT: [[POP:%.]] = call i73 @llvm.ctpop.i73(i73 [[X:%.]])			; CHECK-NEXT: ret i1 true
	; CHECK-NEXT: [[CMP:%.*]] = icmp ne i73 [[POP]], 75
	; CHECK-NEXT: ret i1 [[CMP]]
	;			;
	%pop = call i73 @llvm.ctpop.i73(i73 %x)			%pop = call i73 @llvm.ctpop.i73(i73 %x)
	%cmp = icmp ne i73 %pop, 75			%cmp = icmp ne i73 %pop, 75
	ret i1 %cmp			ret i1 %cmp
	}			}

	define <2 x i1> @ctpop_slt_bitwidth_plus1_splat(<2 x i13> %x) {			define <2 x i1> @ctpop_slt_bitwidth_plus1_splat(<2 x i13> %x) {
	; CHECK-LABEL: @ctpop_slt_bitwidth_plus1_splat(			; CHECK-LABEL: @ctpop_slt_bitwidth_plus1_splat(
	; CHECK-NEXT: [[POP:%.]] = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> [[X:%.]])			; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true>
	; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], <i13 14, i13 14>
	; CHECK-NEXT: ret <2 x i1> [[CMP]]
	;			;
	%pop = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> %x)			%pop = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> %x)
	%cmp = icmp slt <2 x i13> %pop, <i13 14, i13 14>			%cmp = icmp slt <2 x i13> %pop, <i13 14, i13 14>
	ret <2 x i1> %cmp			ret <2 x i1> %cmp
	}			}

				; Negative test - does not simplify, but instcombine could reduce this.

	define <2 x i1> @ctpop_slt_bitwidth_splat(<2 x i13> %x) {			define <2 x i1> @ctpop_slt_bitwidth_splat(<2 x i13> %x) {
	; CHECK-LABEL: @ctpop_slt_bitwidth_splat(			; CHECK-LABEL: @ctpop_slt_bitwidth_splat(
	; CHECK-NEXT: [[POP:%.]] = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> [[X:%.]])			; CHECK-NEXT: [[POP:%.]] = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> [[X:%.]])
	; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], <i13 13, i13 13>			; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], <i13 13, i13 13>
	; CHECK-NEXT: ret <2 x i1> [[CMP]]			; CHECK-NEXT: ret <2 x i1> [[CMP]]
	;			;
	%pop = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> %x)			%pop = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> %x)
	%cmp = icmp slt <2 x i13> %pop, <i13 13, i13 13>			%cmp = icmp slt <2 x i13> %pop, <i13 13, i13 13>
	ret <2 x i1> %cmp			ret <2 x i1> %cmp
	}			}

	attributes #0 = { null_pointer_is_valid }			attributes #0 = { null_pointer_is_valid }