This is an archive of the discontinued LLVM Phabricator instance.

[X86] Add more patterns for BZHI isel
ClosedPublic

Authored by craig.topper on Apr 27 2017, 2:20 PM.

Download Raw Diff

Details

Reviewers

RKSimon
zvi
spatel

Commits

rGf893d49f0ced: [X86] Add more patterns for BZHI isel
rL302549: [X86] Add more patterns for BZHI isel

Summary

This patch adds more patterns that a reasonable person might write that can be compiled to BZHI.

This adds support for

(~0U >> (32 - b)) & a;

and

a << (32 - b) >> (32 - b);

This was inspired by the code in APInt::clearUnusedBits.

This can pass an index of 32 to the bzhi instruction which a quick test of Haswell hardware shows will not mask any bits. Though the description text in the Intel manual says the "index is saturated to OperandSize-1". The pseudocode in the same manual indicates no bits will be zeroed for this case.

I think this is still missing cases where the subtract portion is an 8-bit operation.

Diff Detail

Event Timeline

craig.topper created this revision.Apr 27 2017, 2:20 PM

According to the AMD APM v3:

If the value of index is greater than or equal to the operand size, index is set to (op_size-1). In this case, the CF flag is set.

So AMD's description is similar to the incorrect Intel description. Do you have an AMD machine that you can verify this with?

I tested with

printf("%x", _bzhi_u32(0xffffffff, 32));

On Haswell that returned 0xffffffff.

In D32616#741875, @craig.topper wrote:

So AMD's description is similar to the incorrect Intel description. Do you have an AMD machine that you can verify this with?

I tested with

printf("%x", _bzhi_u32(0xffffffff, 32));

On Haswell that returned 0xffffffff.

Excavator returned 0xffffffff as well. The Ryzen is in pieces so I can't test it right now.

Ryzen returns 0xffffffff as well.

LGTM.

This revision is now accepted and ready to land.May 9 2017, 2:49 AM

Closed by commit rL302549: [X86] Add more patterns for BZHI isel (authored by ctopper). · Explain WhyMay 9 2017, 9:45 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

lib/

Target/

X86/

X86InstrInfo.td

32 lines

test/

CodeGen/

X86/

bmi.ll

76 lines

Diff 96997

lib/Target/X86/X86InstrInfo.td

Show First 20 Lines • Show All 2,337 Lines • ▼ Show 20 Lines	let Predicates = [HasBMI2] in {

def : Pat<(and GR64:$src, (add (shl 1, GR8:$lz), -1)),		def : Pat<(and GR64:$src, (add (shl 1, GR8:$lz), -1)),
(BZHI64rr GR64:$src,		(BZHI64rr GR64:$src,
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;		(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;

def : Pat<(and (loadi64 addr:$src), (add (shl 1, GR8:$lz), -1)),		def : Pat<(and (loadi64 addr:$src), (add (shl 1, GR8:$lz), -1)),
(BZHI64rm addr:$src,		(BZHI64rm addr:$src,
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;		(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;

		// x & (-1 >> (32 - y))
		def : Pat<(and GR32:$src, (srl -1, (i8 (trunc (sub 32, GR32:$lz))))),
		(BZHI32rr GR32:$src, GR32:$lz)>;
		def : Pat<(and (loadi32 addr:$src), (srl -1, (i8 (trunc (sub 32, GR32:$lz))))),
		(BZHI32rm addr:$src, GR32:$lz)>;

		// x & (-1 >> (64 - y))
		def : Pat<(and GR64:$src, (srl -1, (i8 (trunc (sub 64, GR32:$lz))))),
		(BZHI64rr GR64:$src,
		(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
		def : Pat<(and (loadi64 addr:$src), (srl -1, (i8 (trunc (sub 64, GR32:$lz))))),
		(BZHI64rm addr:$src,
		(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;

		// x << (32 - y) >> (32 - y)
		def : Pat<(srl (shl GR32:$src, (i8 (trunc (sub 32, GR32:$lz)))),
		(i8 (trunc (sub 32, GR32:$lz)))),
		(BZHI32rr GR32:$src, GR32:$lz)>;
		def : Pat<(srl (shl (loadi32 addr:$src), (i8 (trunc (sub 32, GR32:$lz)))),
		(i8 (trunc (sub 32, GR32:$lz)))),
		(BZHI32rm addr:$src, GR32:$lz)>;

		// x << (64 - y) >> (64 - y)
		def : Pat<(srl (shl GR64:$src, (i8 (trunc (sub 64, GR32:$lz)))),
		(i8 (trunc (sub 64, GR32:$lz)))),
		(BZHI64rr GR64:$src,
		(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
		def : Pat<(srl (shl (loadi64 addr:$src), (i8 (trunc (sub 64, GR32:$lz)))),
		(i8 (trunc (sub 64, GR32:$lz)))),
		(BZHI64rm addr:$src,
		(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
} // HasBMI2		} // HasBMI2

let Predicates = [HasBMI] in {		let Predicates = [HasBMI] in {
def : Pat<(X86bextr GR32:$src1, GR32:$src2),		def : Pat<(X86bextr GR32:$src1, GR32:$src2),
(BEXTR32rr GR32:$src1, GR32:$src2)>;		(BEXTR32rr GR32:$src1, GR32:$src2)>;
def : Pat<(X86bextr (loadi32 addr:$src1), GR32:$src2),		def : Pat<(X86bextr (loadi32 addr:$src1), GR32:$src2),
(BEXTR32rm addr:$src1, GR32:$src2)>;		(BEXTR32rm addr:$src1, GR32:$src2)>;
def : Pat<(X86bextr GR64:$src1, GR64:$src2),		def : Pat<(X86bextr GR64:$src1, GR64:$src2),
▲ Show 20 Lines • Show All 805 Lines • Show Last 20 Lines

test/CodeGen/X86/bmi.ll

	Show First 20 Lines • Show All 448 Lines • ▼ Show 20 Lines
	entry:			entry:
	%conv = zext i8 %index to i32			%conv = zext i8 %index to i32
	%shl = shl i32 1, %conv			%shl = shl i32 1, %conv
	%sub = add nsw i32 %shl, -1			%sub = add nsw i32 %shl, -1
	%and = and i32 %x, %sub			%and = and i32 %x, %sub
	ret i32 %and			ret i32 %and
	}			}

				define i32 @bzhi32d(i32 %a, i32 %b) {
				; CHECK-LABEL: bzhi32d:
				; CHECK: # BB#0: # %entry
				; CHECK-NEXT: bzhil %esi, %edi, %eax
				; CHECK-NEXT: retq
				entry:
				%sub = sub i32 32, %b
				%shr = lshr i32 -1, %sub
				%and = and i32 %shr, %a
				ret i32 %and
				}

				define i32 @bzhi32e(i32 %a, i32 %b) {
				; CHECK-LABEL: bzhi32e:
				; CHECK: # BB#0: # %entry
				; CHECK-NEXT: bzhil %esi, %edi, %eax
				; CHECK-NEXT: retq
				entry:
				%sub = sub i32 32, %b
				%shl = shl i32 %a, %sub
				%shr = lshr i32 %shl, %sub
				ret i32 %shr
				}

	define i64 @bzhi64b(i64 %x, i8 zeroext %index) {			define i64 @bzhi64b(i64 %x, i8 zeroext %index) {
	; CHECK-LABEL: bzhi64b:			; CHECK-LABEL: bzhi64b:
	; CHECK: # BB#0: # %entry			; CHECK: # BB#0: # %entry
	; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>			; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
	; CHECK-NEXT: bzhiq %rsi, %rdi, %rax			; CHECK-NEXT: bzhiq %rsi, %rdi, %rax
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%conv = zext i8 %index to i64			%conv = zext i8 %index to i64
	%shl = shl i64 1, %conv			%shl = shl i64 1, %conv
	%sub = add nsw i64 %shl, -1			%sub = add nsw i64 %shl, -1
	%and = and i64 %x, %sub			%and = and i64 %x, %sub
	ret i64 %and			ret i64 %and
	}			}

				define i64 @bzhi64c(i64 %a, i64 %b) {
				; CHECK-LABEL: bzhi64c:
				; CHECK: # BB#0: # %entry
				; CHECK-NEXT: bzhiq %rsi, %rdi, %rax
				; CHECK-NEXT: retq
				entry:
				%sub = sub i64 64, %b
				%shr = lshr i64 -1, %sub
				%and = and i64 %shr, %a
				ret i64 %and
				}

				define i64 @bzhi64d(i64 %a, i32 %b) {
				; CHECK-LABEL: bzhi64d:
				; CHECK: # BB#0: # %entry
				; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
				; CHECK-NEXT: bzhiq %rsi, %rdi, %rax
				; CHECK-NEXT: retq
				entry:
				%sub = sub i32 64, %b
				%sh_prom = zext i32 %sub to i64
				%shr = lshr i64 -1, %sh_prom
				%and = and i64 %shr, %a
				ret i64 %and
				}

				define i64 @bzhi64e(i64 %a, i64 %b) {
				; CHECK-LABEL: bzhi64e:
				; CHECK: # BB#0: # %entry
				; CHECK-NEXT: bzhiq %rsi, %rdi, %rax
				; CHECK-NEXT: retq
				entry:
				%sub = sub i64 64, %b
				%shl = shl i64 %a, %sub
				%shr = lshr i64 %shl, %sub
				ret i64 %shr
				}

				define i64 @bzhi64f(i64 %a, i32 %b) {
				; CHECK-LABEL: bzhi64f:
				; CHECK: # BB#0: # %entry
				; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
				; CHECK-NEXT: bzhiq %rsi, %rdi, %rax
				; CHECK-NEXT: retq
				entry:
				%sub = sub i32 64, %b
				%sh_prom = zext i32 %sub to i64
				%shl = shl i64 %a, %sh_prom
				%shr = lshr i64 %shl, %sh_prom
				ret i64 %shr
				}

	define i64 @bzhi64_constant_mask(i64 %x) {			define i64 @bzhi64_constant_mask(i64 %x) {
	; CHECK-LABEL: bzhi64_constant_mask:			; CHECK-LABEL: bzhi64_constant_mask:
	; CHECK: # BB#0: # %entry			; CHECK: # BB#0: # %entry
	; CHECK-NEXT: movb $62, %al			; CHECK-NEXT: movb $62, %al
	; CHECK-NEXT: bzhiq %rax, %rdi, %rax			; CHECK-NEXT: bzhiq %rax, %rdi, %rax
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%and = and i64 %x, 4611686018427387903			%and = and i64 %x, 4611686018427387903
	▲ Show 20 Lines • Show All 171 Lines • Show Last 20 Lines