This is an archive of the discontinued LLVM Phabricator instance.

[InstCombine] look through bswaps for equality comparisons
ClosedPublic

Authored by spatel on Jun 28 2017, 10:06 AM.

Download Raw Diff

Details

Reviewers

efriedma
craig.topper
RKSimon

Commits

rGc3d5cf0bb71f: [InstCombine] look through bswap/bitreverse for equality comparisons
rL306980: [InstCombine] look through bswap/bitreverse for equality comparisons

Summary

I noticed this missed optimization in the CGP memcmp() expansion, and then saw that we don't have the fold in InstCombine.

It wasn't immediately clear to me that a vector bswap swaps the bytes of each element in the vector while leaving the elements in place. Should I add a blurb about that in the LangRef?

Diff Detail

Event Timeline

spatel created this revision.Jun 28 2017, 10:06 AM

Herald added a subscriber: mcrosier. · View Herald TranscriptJun 28 2017, 10:06 AM

You should be able to do the same for bitreverse as well.

Worth handling the cmp( bswap(x), constant ) -> cmp( x, bswap(constant) ) case as well?

Patch updated:

Added bitreverse transform since that's a close relative.
I made the constant operand matcher a TODO for now since I don't have evidence of it.

Interesting side note: we probably don't want to make this change in the memcmp expansion yet because we produce worse (sometimes substantially worse) code for both PPC and x86 with this transform. Ie, we fail to use a byte-reversing memory instruction, and/or we have more register usage and several extra instructions because of that.

LGTM, I think updating the langref to explain that bswap/bitreverse apply within each vector element (and not across them) is a good idea.

This revision is now accepted and ready to land.Jun 29 2017, 2:27 AM

This was closed with rL306980 (Phab was dead when that commit was made).

Revision Contents

Path

Size

lib/

Transforms/

InstCombine/

InstCombineCompares.cpp

9 lines

test/

Transforms/

InstCombine/

icmp.ll

16 lines

Diff 104496

lib/Transforms/InstCombine/InstCombineCompares.cpp

Show First 20 Lines • Show All 3,431 Lines • ▼ Show 20 Lines	if (ShAmt < ASize) {
APInt CmpV = Cst1->getValue().zext(ASize);		APInt CmpV = Cst1->getValue().zext(ASize);
CmpV <<= ShAmt;		CmpV <<= ShAmt;

Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));		Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
return new ICmpInst(Pred, Mask, Builder->getInt(CmpV));		return new ICmpInst(Pred, Mask, Builder->getInt(CmpV));
}		}
}		}

		// If both operands are byte-swapped or bit-reversed, just compare the
		// original values.
		// TODO: If one operand is a constant, we can swap/reverse it and eliminate
		// the use of the swapped/reversed variable.
		if ((match(Op0, m_BSwap(m_Value(A))) && match(Op1, m_BSwap(m_Value(B)))) \|\|
		(match(Op0, m_Intrinsic<Intrinsic::bitreverse>(m_Value(A))) &&
		match(Op1, m_Intrinsic<Intrinsic::bitreverse>(m_Value(B)))))
		return new ICmpInst(Pred, A, B);

return nullptr;		return nullptr;
}		}

/// Handle icmp (cast x to y), (cast/cst). We only handle extending casts so		/// Handle icmp (cast x to y), (cast/cst). We only handle extending casts so
/// far.		/// far.
Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {		Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
const CastInst *LHSCI = cast<CastInst>(ICmp.getOperand(0));		const CastInst *LHSCI = cast<CastInst>(ICmp.getOperand(0));
Value *LHSCIOp = LHSCI->getOperand(0);		Value *LHSCIOp = LHSCI->getOperand(0);
▲ Show 20 Lines • Show All 1,638 Lines • Show Last 20 Lines

test/Transforms/InstCombine/icmp.ll

Show First 20 Lines • Show All 2,973 Lines • ▼ Show 20 Lines	;
%C = icmp eq <2 x i32> %A, %B		%C = icmp eq <2 x i32> %A, %B
ret <2 x i1> %C		ret <2 x i1> %C
}		}

declare i32 @llvm.bswap.i32(i32)		declare i32 @llvm.bswap.i32(i32)

define i1 @bswap_ne(i32 %x, i32 %y) {		define i1 @bswap_ne(i32 %x, i32 %y) {
; CHECK-LABEL: @bswap_ne(		; CHECK-LABEL: @bswap_ne(
; CHECK-NEXT: [[SWAPX:%.*]] = call i32 @llvm.bswap.i32(i32 %x)		; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 %x, %y
; CHECK-NEXT: [[SWAPY:%.*]] = call i32 @llvm.bswap.i32(i32 %y)
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[SWAPX]], [[SWAPY]]
; CHECK-NEXT: ret i1 [[CMP]]		; CHECK-NEXT: ret i1 [[CMP]]
;		;
%swapx = call i32 @llvm.bswap.i32(i32 %x)		%swapx = call i32 @llvm.bswap.i32(i32 %x)
%swapy = call i32 @llvm.bswap.i32(i32 %y)		%swapy = call i32 @llvm.bswap.i32(i32 %y)
%cmp = icmp ne i32 %swapx, %swapy		%cmp = icmp ne i32 %swapx, %swapy
ret i1 %cmp		ret i1 %cmp
}		}

declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)		declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)

define <8 x i1> @bswap_vec_eq(<8 x i16> %x, <8 x i16> %y) {		define <8 x i1> @bswap_vec_eq(<8 x i16> %x, <8 x i16> %y) {
; CHECK-LABEL: @bswap_vec_eq(		; CHECK-LABEL: @bswap_vec_eq(
; CHECK-NEXT: [[SWAPX:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %x)		; CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> %x, %y
; CHECK-NEXT: [[SWAPY:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %y)
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[SWAPX]], [[SWAPY]]
; CHECK-NEXT: ret <8 x i1> [[CMP]]		; CHECK-NEXT: ret <8 x i1> [[CMP]]
;		;
%swapx = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %x)		%swapx = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %x)
%swapy = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %y)		%swapy = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %y)
%cmp = icmp eq <8 x i16> %swapx, %swapy		%cmp = icmp eq <8 x i16> %swapx, %swapy
ret <8 x i1> %cmp		ret <8 x i1> %cmp
}		}

declare i64 @llvm.bitreverse.i64(i64)		declare i64 @llvm.bitreverse.i64(i64)

define i1 @bitreverse_eq(i64 %x, i64 %y) {		define i1 @bitreverse_eq(i64 %x, i64 %y) {
; CHECK-LABEL: @bitreverse_eq(		; CHECK-LABEL: @bitreverse_eq(
; CHECK-NEXT: [[REVX:%.*]] = call i64 @llvm.bitreverse.i64(i64 %x)		; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 %x, %y
; CHECK-NEXT: [[REVY:%.*]] = call i64 @llvm.bitreverse.i64(i64 %y)
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[REVX]], [[REVY]]
; CHECK-NEXT: ret i1 [[CMP]]		; CHECK-NEXT: ret i1 [[CMP]]
;		;
%revx = call i64 @llvm.bitreverse.i64(i64 %x)		%revx = call i64 @llvm.bitreverse.i64(i64 %x)
%revy = call i64 @llvm.bitreverse.i64(i64 %y)		%revy = call i64 @llvm.bitreverse.i64(i64 %y)
%cmp = icmp eq i64 %revx, %revy		%cmp = icmp eq i64 %revx, %revy
ret i1 %cmp		ret i1 %cmp
}		}

declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)		declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)

define <8 x i1> @bitreverse_vec_ne(<8 x i16> %x, <8 x i16> %y) {		define <8 x i1> @bitreverse_vec_ne(<8 x i16> %x, <8 x i16> %y) {
; CHECK-LABEL: @bitreverse_vec_ne(		; CHECK-LABEL: @bitreverse_vec_ne(
; CHECK-NEXT: [[REVX:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %x)		; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i16> %x, %y
; CHECK-NEXT: [[REVY:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %y)
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i16> [[REVX]], [[REVY]]
; CHECK-NEXT: ret <8 x i1> [[CMP]]		; CHECK-NEXT: ret <8 x i1> [[CMP]]
;		;
%revx = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %x)		%revx = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %x)
%revy = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %y)		%revy = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %y)
%cmp = icmp ne <8 x i16> %revx, %revy		%cmp = icmp ne <8 x i16> %revx, %revy
ret <8 x i1> %cmp		ret <8 x i1> %cmp
}		}