Diff 557744

llvm/include/llvm/CodeGen/TargetLowering.h

Show First 20 Lines • Show All 815 Lines • ▼ Show 20 Lines	public:
// (fdiv C, (uitofp Pow2))		// (fdiv C, (uitofp Pow2))
// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))		// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
//		//
// This is only queried after we have verified the transform will be bitwise		// This is only queried after we have verified the transform will be bitwise
// equals.		// equals.
//		//
// SDNode *N : The FDiv/FMul node we want to transform.		// SDNode *N : The FDiv/FMul node we want to transform.
// SDValue FPConst: The Float constant operand in `N`.		// SDValue FPConst: The Float constant operand in `N`.
// SDValue IntPow2: The Integer power of 2 operand in `N`.		// SDValue IntPow2: The Integer power of 2 operand in `N`.
		RKSimonUnsubmitted Not Done Reply Inline Actions This function name seems very general and yet the transform seems very specific. RKSimon: This function name seems very general and yet the transform seems very specific.
		goldstein.w.nAuthorUnsubmitted Done Reply Inline Actions Changed to "PiecesOfOperand" hopefully thats clearer. Otherwise, what would you suggest. "OperandPieces" is mean to refer to the fact that this for comparisons where we are comparing a part of X with another part of X (i.e (X & 255) == (X >> 8)` is comparing that the top 8bits (one piece) equals the low 8bits (another piece). goldstein.w.n: Changed to "PiecesOfOperand" hopefully thats clearer. Otherwise, what would you suggest.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,		virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
		RKSimonUnsubmitted Not Done Reply Inline Actions Maybe EVT /VT/ style would be better than all the (void) ? RKSimon: Maybe EVT /VT/ style would be better than all the (void) ?
		goldstein.w.nAuthorUnsubmitted Done Reply Inline Actions What is EVT /VT/ style? goldstein.w.n: What is EVT /VT/ style?
SDValue IntPow2) const {		SDValue IntPow2) const {
// Default to avoiding fdiv which is often very expensive.		// Default to avoiding fdiv which is often very expensive.
return N->getOpcode() == ISD::FDIV;		return N->getOpcode() == ISD::FDIV;
}		}

		// Given:
		// (icmp eq/ne (and X, C0), (shift X, C1))
		RKSimonUnsubmitted Done Reply Inline Actions Remove the (void)s RKSimon: Remove the (void)s
		// or
		// (icmp eq/ne X, (rotate X, CPow2))

		// If C0 is a mask or shifted mask and the shift amt (C1) isolates the
		// remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
		// Do we prefer the shift to be shift-right, shift-left, or rotate.
		// Note: Its only valid to convert the rotate version to the shift version iff
		// the shift-amt (`C1`) is a power of 2 (including 0).
		// If ShiftOpc (current Opcode) is returned, do nothing.
		virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(
		EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
		const APInt &ShiftOrRotateAmt,
		const std::optional<APInt> &AndMask) const {
		return ShiftOpc;
		}

/// These two forms are equivalent:		/// These two forms are equivalent:
/// sub %y, (xor %x, -1)		/// sub %y, (xor %x, -1)
/// add (add %x, 1), %y		/// add (add %x, 1), %y
/// The variant with two add's is IR-canonical.		/// The variant with two add's is IR-canonical.
/// Some targets may prefer one to the other.		/// Some targets may prefer one to the other.
virtual bool preferIncOfAddToSubOfNot(EVT VT) const {		virtual bool preferIncOfAddToSubOfNot(EVT VT) const {
// By default, let's assume that everyone prefers the form with two add's.		// By default, let's assume that everyone prefers the form with two add's.
return true;		return true;
▲ Show 20 Lines • Show All 4,575 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 12,437 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::visitSETCC(SDNode *N) {
// setcc is very commonly used as an argument to brcond. This pattern		// setcc is very commonly used as an argument to brcond. This pattern
// also lend itself to numerous combines and, as a result, it is desired		// also lend itself to numerous combines and, as a result, it is desired
// we keep the argument to a brcond as a setcc as much as possible.		// we keep the argument to a brcond as a setcc as much as possible.
bool PreferSetCC =		bool PreferSetCC =
N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;		N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;

ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();		ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
		SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);

SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,		SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
		RKSimonUnsubmitted Done Reply Inline Actions SimplifySetCC(VT, N0, N1, Cond, .... RKSimon: SimplifySetCC(VT, N0, N1, Cond, ....
SDLoc(N), !PreferSetCC);

if (!Combined)
return SDValue();

		if (Combined) {
// If we prefer to have a setcc, and we don't, we'll try our best to		// If we prefer to have a setcc, and we don't, we'll try our best to
// recreate one using rebuildSetCC.		// recreate one using rebuildSetCC.
if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {		if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
SDValue NewSetCC = rebuildSetCC(Combined);		SDValue NewSetCC = rebuildSetCC(Combined);

// We don't have anything interesting to combine to.		// We don't have anything interesting to combine to.
if (NewSetCC.getNode() == N)		if (NewSetCC.getNode() == N)
return SDValue();		return SDValue();

if (NewSetCC)		if (NewSetCC)
return NewSetCC;		return NewSetCC;
}		}

return Combined;		return Combined;
}		}

		// Optimize
		// 1) (icmp eq/ne (and X, C0), (shift X, C1))
		// or
		// 2) (icmp eq/ne X, (rotate X, C1))
		// If C0 is a mask or shifted mask and the shift amt (C1) isolates the
		// remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
		// Then:
		// If C1 is a power of 2, then the rotate and shift+and versions are
		// equivilent, so we can interchange them depending on target preference.
		// Otherwise, if we have the shift+and version we can interchange srl/shl
		// which inturn affects the constant C0. We can use this to get better
		// constants again determined by target preference.
		if (Cond == ISD::SETNE \|\| Cond == ISD::SETEQ) {
		auto IsAndWithShift = [](SDValue A, SDValue B) {
		return A.getOpcode() == ISD::AND &&
		(B.getOpcode() == ISD::SRL \|\| B.getOpcode() == ISD::SHL) &&
		A.getOperand(0) == B.getOperand(0);
		};
		auto IsRotateWithOp = [](SDValue A, SDValue B) {
		return (B.getOpcode() == ISD::ROTL \|\| B.getOpcode() == ISD::ROTR) &&
		B.getOperand(0) == A;
		};
		SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
		bool IsRotate = false;

		// Find either shift+and or rotate pattern.
		if (IsAndWithShift(N0, N1)) {
		AndOrOp = N0;
		ShiftOrRotate = N1;
		} else if (IsAndWithShift(N1, N0)) {
		AndOrOp = N1;
		ShiftOrRotate = N0;
		} else if (IsRotateWithOp(N0, N1)) {
		IsRotate = true;
		AndOrOp = N0;
		ShiftOrRotate = N1;
		} else if (IsRotateWithOp(N1, N0)) {
		IsRotate = true;
		AndOrOp = N1;
		ShiftOrRotate = N0;
		}

		if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
		(IsRotate \|\| AndOrOp.hasOneUse())) {
		EVT OpVT = N0.getValueType();
		// Get constant shift/rotate amount and possibly mask (if its shift+and
		// variant).
		auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
		ConstantSDNode CNode = isConstOrConstSplat(Op, /AllowUndefs*/ false,
		/AllowTrunc/ false);
		if (CNode == nullptr)
		return std::nullopt;
		return CNode->getAPIntValue();
		};
		std::optional<APInt> AndCMask =
		IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
		std::optional<APInt> ShiftCAmt =
		GetAPIntValue(ShiftOrRotate.getOperand(1));
		unsigned NumBits = OpVT.getScalarSizeInBits();

		// We found constants.
		if (ShiftCAmt && (IsRotate \|\| AndCMask) && ShiftCAmt->ult(NumBits)) {
		unsigned ShiftOpc = ShiftOrRotate.getOpcode();
		// Check that the constants meet the constraints.
		bool CanTransform =
		IsRotate \|\|
		(ShiftCAmt == (~AndCMask).popcount() && ShiftOpc == ISD::SHL
		? (~*AndCMask).isMask()
		: AndCMask->isMask());
		RKSimonUnsubmitted Not Done Reply Inline Actions (~AndCMask).popcount() - is this the same as !AndCMask->isAllOnes() ? RKSimon:* (~*AndCMask).popcount() - is this the same as !AndCMask->isAllOnes() ?
		goldstein.w.nAuthorUnsubmitted Done Reply Inline Actions No, we are checking for something like: `(icmp eq (and X, 0x1f), (srl, X, 3)`. Not all ones. goldstein.w.n: No, we are checking for something like: `(icmp eq (and X, 0x1f), (srl, X, 3)`. Not all ones.
		RKSimonUnsubmitted Not Done Reply Inline Actions Test ShiftOpc == ISD::SHL before the popcount as its cheaper? RKSimon: Test ShiftOpc == ISD::SHL before the popcount as its cheaper?

		// See if target prefers another shift/rotate opcode.
		unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
		OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
		// Transform is valid and we have a new preference.
		if (CanTransform && NewShiftOpc != ShiftOpc) {
		SDLoc DL(N);
		SDValue NewShiftOrRotate =
		DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
		ShiftOrRotate.getOperand(1));
		SDValue NewAndOrOp = SDValue();

		if (NewShiftOpc == ISD::SHL \|\| NewShiftOpc == ISD::SRL) {
		APInt NewMask =
		NewShiftOpc == ISD::SHL
		? APInt::getHighBitsSet(NumBits,
		NumBits - ShiftCAmt->getZExtValue())
		: APInt::getLowBitsSet(NumBits,
		NumBits - ShiftCAmt->getZExtValue());
		NewAndOrOp =
		DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
		DAG.getConstant(NewMask, DL, OpVT));
		} else {
		NewAndOrOp = ShiftOrRotate.getOperand(0);
		}

		return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
		}
		}
		}
		}
		return SDValue();
		RKSimonUnsubmitted Done Reply Inline Actions Isn't this the same as Cond at line#12405? RKSimon: Isn't this the same as Cond at line#12405?
		}

SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {		SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
SDValue LHS = N->getOperand(0);		SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);		SDValue RHS = N->getOperand(1);
SDValue Carry = N->getOperand(2);		SDValue Carry = N->getOperand(2);
SDValue Cond = N->getOperand(3);		SDValue Cond = N->getOperand(3);

// If Carry is false, fold to a regular SETCC.		// If Carry is false, fold to a regular SETCC.
if (isNullConstant(Carry))		if (isNullConstant(Carry))
▲ Show 20 Lines • Show All 15,616 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86ISelLowering.h

Show First 20 Lines • Show All 1,132 Lines • ▼ Show 20 Lines	public:

bool hasBitTest(SDValue X, SDValue Y) const override;		bool hasBitTest(SDValue X, SDValue Y) const override;

bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(		bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
SDValue X, ConstantSDNode XC, ConstantSDNode CC, SDValue Y,		SDValue X, ConstantSDNode XC, ConstantSDNode CC, SDValue Y,
unsigned OldShiftOpcode, unsigned NewShiftOpcode,		unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const override;		SelectionDAG &DAG) const override;

		unsigned preferedOpcodeForCmpEqPiecesOfOperand(
		EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
		const APInt &ShiftOrRotateAmt,
		const std::optional<APInt> &AndMask) const override;

bool preferScalarizeSplat(SDNode *N) const override;		bool preferScalarizeSplat(SDNode *N) const override;

bool shouldFoldConstantShiftPairToMask(const SDNode *N,		bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;		CombineLevel Level) const override;

bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;		bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;

bool		bool
▲ Show 20 Lines • Show All 742 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 3,251 Lines • ▼ Show 20 Lines	if (DAG.isSplatValue(Y, /AllowUndefs=/true))
return true;		return true;
// If we have AVX2 with it's powerful shift operations, then it's also good.		// If we have AVX2 with it's powerful shift operations, then it's also good.
if (Subtarget.hasAVX2())		if (Subtarget.hasAVX2())
return true;		return true;
// Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.		// Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
return NewShiftOpcode == ISD::SHL;		return NewShiftOpcode == ISD::SHL;
}		}

		unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand(
		EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
		const APInt &ShiftOrRotateAmt, const std::optional<APInt> &AndMask) const {
		if (!VT.isInteger())
		return ShiftOpc;

		bool PreferRotate = false;
		if (VT.isVector()) {
		// For vectors, if we have rotate instruction support, then its definetly
		// best. Otherwise its not clear what the best so just don't make changed.
		PreferRotate = Subtarget.hasAVX512() && (VT.getScalarType() == MVT::i32 \|\|
		VT.getScalarType() == MVT::i64);
		} else {
		// For scalar, if we have bmi prefer rotate for rorx. Otherwise prefer
		// rotate unless we have a zext mask+shr.
		PreferRotate = Subtarget.hasBMI2();
		if (!PreferRotate) {
		unsigned MaskBits =
		VT.getScalarSizeInBits() - ShiftOrRotateAmt.getZExtValue();
		PreferRotate = (MaskBits != 8) && (MaskBits != 16) && (MaskBits != 32);
		}
		}

		if (ShiftOpc == ISD::SHL \|\| ShiftOpc == ISD::SRL) {
		assert(AndMask.has_value() && "Null andmask when querying about shift+and");
		RKSimonUnsubmitted Done Reply Inline Actions AndMask.has_value() ? RKSimon: AndMask.has_value() ?

		if (PreferRotate && MayTransformRotate)
		return ISD::ROTL;

		// If vector we don't really get much benefit swapping around constants.
		// Maybe we could check if the DAG has the flipped node already in the
		// future.
		if (VT.isVector())
		return ShiftOpc;

		// See if the beneficial to swap shift type.
		if (ShiftOpc == ISD::SHL) {
		// If the current setup has imm64 mask, then inverse will have
		// at least imm32 mask (or be zext i32 -> i64).
		if (VT == MVT::i64)
		return AndMask->getSignificantBits() > 32 ? ISD::SRL : ShiftOpc;

		// We can only benefit if req at least 7-bit for the mask. We
		RKSimonUnsubmitted Done Reply Inline Actions (style) remove the else - the if case always returns: // If the current setup has imm64 mask, then inverse will have // at least imm32 mask (or be zext i32 -> i64). if (VT == MVT::i64) return AndMask->getSignificantBits() > 32 ? ISD::SRL : ShiftOpc; // We can only benefit if req at least 7-bit for the mask. We // don't want to replace shl of 1,2,3 as they can be implemented // with lea/add. return ShiftOrRotateAmt.uge(7) ? ISD::SRL : ShiftOpc; RKSimon: (style) remove the else - the if case always returns: ``` // If the current setup has imm64…
		// don't want to replace shl of 1,2,3 as they can be implemented
		// with lea/add.
		return ShiftOrRotateAmt.uge(7) ? ISD::SRL : ShiftOpc;
		}

		if (VT == MVT::i64)
		// Keep exactly 32-bit imm64, this is zext i32 -> i64 which is
		// extremely efficient.
		return AndMask->getSignificantBits() > 33 ? ISD::SHL : ShiftOpc;

		// Keep small shifts as shl so we can generate add/lea.
		return ShiftOrRotateAmt.ult(7) ? ISD::SHL : ShiftOpc;
		}

		// We prefer rotate for vectors of if we won't get a zext mask with SRL
		// (PreferRotate will be set in the latter case).
		if (PreferRotate \|\| VT.isVector())
		return ShiftOpc;

		// Non-vector type and we have a zext mask with SRL.
		RKSimonUnsubmitted Done Reply Inline Actions Please can you rephrase this? Its rather confusing. RKSimon: Please can you rephrase this? Its rather confusing.
		return ISD::SRL;
		}

bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {		bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
return N->getOpcode() != ISD::FP_EXTEND;		return N->getOpcode() != ISD::FP_EXTEND;
}		}

bool X86TargetLowering::shouldFoldConstantShiftPairToMask(		bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
const SDNode *N, CombineLevel Level) const {		const SDNode *N, CombineLevel Level) const {
assert(((N->getOpcode() == ISD::SHL &&		assert(((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) \|\|		N->getOperand(0).getOpcode() == ISD::SRL) \|\|
▲ Show 20 Lines • Show All 54,176 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll

Show All 14 Lines
declare i16 @llvm.fshr.i16(i16, i16, i16)		declare i16 @llvm.fshr.i16(i16, i16, i16)
declare i8 @llvm.fshl.i8(i8, i8, i8)		declare i8 @llvm.fshl.i8(i8, i8, i8)
declare i8 @llvm.fshr.i8(i8, i8, i8)		declare i8 @llvm.fshr.i8(i8, i8, i8)

define i1 @shr_to_shl_eq_i8_s2(i8 %x) {		define i1 @shr_to_shl_eq_i8_s2(i8 %x) {
; CHECK-LABEL: shr_to_shl_eq_i8_s2:		; CHECK-LABEL: shr_to_shl_eq_i8_s2:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax		; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: andb $63, %al		; CHECK-NEXT: rolb $2, %al
; CHECK-NEXT: shrb $2, %dil		; CHECK-NEXT: cmpb %al, %dil
; CHECK-NEXT: cmpb %dil, %al
; CHECK-NEXT: sete %al		; CHECK-NEXT: sete %al
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%and = and i8 %x, 63		%and = and i8 %x, 63
%shr = lshr i8 %x, 2		%shr = lshr i8 %x, 2
%r = icmp eq i8 %and, %shr		%r = icmp eq i8 %and, %shr
ret i1 %r		ret i1 %r
}		}

define i1 @shl_to_shr_ne_i8_s7(i8 %x) {		define i1 @shl_to_shr_ne_i8_s7(i8 %x) {
; CHECK-LABEL: shl_to_shr_ne_i8_s7:		; CHECK-LABEL: shl_to_shr_ne_i8_s7:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax		; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: shlb $7, %al		; CHECK-NEXT: shrb $7, %al
; CHECK-NEXT: andb $-128, %dil		; CHECK-NEXT: andb $1, %dil
; CHECK-NEXT: cmpb %dil, %al		; CHECK-NEXT: cmpb %al, %dil
; CHECK-NEXT: setne %al		; CHECK-NEXT: setne %al
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%shl = shl i8 %x, 7		%shl = shl i8 %x, 7
%and = and i8 %x, 128		%and = and i8 %x, 128
%r = icmp ne i8 %shl, %and		%r = icmp ne i8 %shl, %and
ret i1 %r		ret i1 %r
}		}

Show All 9 Lines	; CHECK-NEXT: retq
%r = icmp ne i8 %ror, %x		%r = icmp ne i8 %ror, %x
ret i1 %r		ret i1 %r
}		}

define i1 @shr_to_shl_eq_i8_s1(i8 %x) {		define i1 @shr_to_shl_eq_i8_s1(i8 %x) {
; CHECK-LABEL: shr_to_shl_eq_i8_s1:		; CHECK-LABEL: shr_to_shl_eq_i8_s1:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax		; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: andb $127, %al		; CHECK-NEXT: rolb %al
; CHECK-NEXT: shrb %dil		; CHECK-NEXT: cmpb %al, %dil
; CHECK-NEXT: cmpb %dil, %al
; CHECK-NEXT: sete %al		; CHECK-NEXT: sete %al
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%and = and i8 %x, 127		%and = and i8 %x, 127
%shr = lshr i8 %x, 1		%shr = lshr i8 %x, 1
%r = icmp eq i8 %and, %shr		%r = icmp eq i8 %and, %shr
ret i1 %r		ret i1 %r
}		}

define i1 @shr_to_shl_eq_i32_s3(i32 %x) {		define i1 @shr_to_shl_eq_i32_s3(i32 %x) {
; CHECK-LABEL: shr_to_shl_eq_i32_s3:		; CHECK-LABEL: shr_to_shl_eq_i32_s3:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax		; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: andl $536870911, %eax # imm = 0x1FFFFFFF		; CHECK-NEXT: leal (,%rdi,8), %eax
; CHECK-NEXT: shrl $3, %edi		; CHECK-NEXT: andl $-8, %edi
; CHECK-NEXT: cmpl %edi, %eax		; CHECK-NEXT: cmpl %eax, %edi
; CHECK-NEXT: sete %al		; CHECK-NEXT: sete %al
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%and = and i32 %x, 536870911		%and = and i32 %x, 536870911
%shr = lshr i32 %x, 3		%shr = lshr i32 %x, 3
%r = icmp eq i32 %and, %shr		%r = icmp eq i32 %and, %shr
ret i1 %r		ret i1 %r
}		}

define i1 @shl_to_shr_eq_i32_s3_fail(i32 %x) {		define i1 @shl_to_shr_eq_i32_s3_fail(i32 %x) {
; CHECK-LABEL: shl_to_shr_eq_i32_s3_fail:		; CHECK-LABEL: shl_to_shr_eq_i32_s3_fail:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax		; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: andl $536870911, %eax # imm = 0x1FFFFFFF		; CHECK-NEXT: andl $536870911, %eax # imm = 0x1FFFFFFF
; CHECK-NEXT: shll $3, %edi		; CHECK-NEXT: shll $3, %edi
; CHECK-NEXT: cmpl %edi, %eax		; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: sete %al		; CHECK-NEXT: sete %al
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%and = and i32 %x, 536870911		%and = and i32 %x, 536870911
%shr = shl i32 %x, 3		%shr = shl i32 %x, 3
%r = icmp eq i32 %and, %shr		%r = icmp eq i32 %and, %shr
ret i1 %r		ret i1 %r
}		}

define i1 @shl_to_shr_ne_i32_s16(i32 %x) {		define i1 @shl_to_shr_ne_i32_s16(i32 %x) {
; CHECK-LABEL: shl_to_shr_ne_i32_s16:		; CHECK-NOBMI-LABEL: shl_to_shr_ne_i32_s16:
; CHECK: # %bb.0:		; CHECK-NOBMI: # %bb.0:
; CHECK-NEXT: movl %edi, %eax		; CHECK-NOBMI-NEXT: movzwl %di, %eax
; CHECK-NEXT: shll $16, %eax		; CHECK-NOBMI-NEXT: shrl $16, %edi
; CHECK-NEXT: andl $-65536, %edi # imm = 0xFFFF0000		; CHECK-NOBMI-NEXT: cmpl %edi, %eax
; CHECK-NEXT: cmpl %edi, %eax		; CHECK-NOBMI-NEXT: setne %al
; CHECK-NEXT: setne %al		; CHECK-NOBMI-NEXT: retq
; CHECK-NEXT: retq		;
		; CHECK-BMI2-LABEL: shl_to_shr_ne_i32_s16:
		; CHECK-BMI2: # %bb.0:
		; CHECK-BMI2-NEXT: rorxl $16, %edi, %eax
		; CHECK-BMI2-NEXT: cmpl %eax, %edi
		; CHECK-BMI2-NEXT: setne %al
		; CHECK-BMI2-NEXT: retq
%shl = shl i32 %x, 16		%shl = shl i32 %x, 16
%and = and i32 %x, 4294901760		%and = and i32 %x, 4294901760
%r = icmp ne i32 %shl, %and		%r = icmp ne i32 %shl, %and
ret i1 %r		ret i1 %r
}		}

define i1 @shl_to_shr_ne_i32_s16_fail(i32 %x) {		define i1 @shl_to_shr_ne_i32_s16_fail(i32 %x) {
; CHECK-LABEL: shl_to_shr_ne_i32_s16_fail:		; CHECK-LABEL: shl_to_shr_ne_i32_s16_fail:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax		; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: shll $16, %eax		; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: andl $2147450880, %edi # imm = 0x7FFF8000		; CHECK-NEXT: andl $2147450880, %edi # imm = 0x7FFF8000
; CHECK-NEXT: cmpl %edi, %eax		; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: setne %al		; CHECK-NEXT: setne %al
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%shl = shl i32 %x, 16		%shl = shl i32 %x, 16
%and = and i32 %x, 2147450880		%and = and i32 %x, 2147450880
%r = icmp ne i32 %shl, %and		%r = icmp ne i32 %shl, %and
ret i1 %r		ret i1 %r
}		}

define i1 @shr_to_shl_eq_i16_s1(i16 %x) {		define i1 @shr_to_shl_eq_i16_s1(i16 %x) {
; CHECK-LABEL: shr_to_shl_eq_i16_s1:		; CHECK-LABEL: shr_to_shl_eq_i16_s1:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movzwl %di, %eax		; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: andl $32767, %edi # imm = 0x7FFF		; CHECK-NEXT: rolw %ax
; CHECK-NEXT: shrl %eax
; CHECK-NEXT: cmpw %ax, %di		; CHECK-NEXT: cmpw %ax, %di
; CHECK-NEXT: sete %al		; CHECK-NEXT: sete %al
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%and = and i16 %x, 32767		%and = and i16 %x, 32767
%shr = lshr i16 %x, 1		%shr = lshr i16 %x, 1
%r = icmp eq i16 %and, %shr		%r = icmp eq i16 %and, %shr
ret i1 %r		ret i1 %r
}		}
Show All 11 Lines	; CHECK-NEXT: retq
%shr = lshr i16 %x, 1		%shr = lshr i16 %x, 1
%r = icmp eq i16 %and, %shr		%r = icmp eq i16 %and, %shr
ret i1 %r		ret i1 %r
}		}

define i1 @shl_to_shr_eq_i64_s44(i64 %x) {		define i1 @shl_to_shr_eq_i64_s44(i64 %x) {
; CHECK-LABEL: shl_to_shr_eq_i64_s44:		; CHECK-LABEL: shl_to_shr_eq_i64_s44:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movabsq $-17592186044416, %rax # imm = 0xFFFFF00000000000		; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: andq %rdi, %rax		; CHECK-NEXT: shrq $44, %rax
; CHECK-NEXT: shlq $44, %rdi		; CHECK-NEXT: andl $1048575, %edi # imm = 0xFFFFF
; CHECK-NEXT: cmpq %rax, %rdi		; CHECK-NEXT: cmpq %rax, %rdi
; CHECK-NEXT: sete %al		; CHECK-NEXT: sete %al
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%shl = shl i64 %x, 44		%shl = shl i64 %x, 44
%and = and i64 %x, 18446726481523507200		%and = and i64 %x, 18446726481523507200
%r = icmp eq i64 %shl, %and		%r = icmp eq i64 %shl, %and
ret i1 %r		ret i1 %r
}		}

define i1 @shr_to_shl_ne_i64_s32(i64 %x) {		define i1 @shr_to_shl_ne_i64_s32(i64 %x) {
; CHECK-LABEL: shr_to_shl_ne_i64_s32:		; CHECK-NOBMI-LABEL: shr_to_shl_ne_i64_s32:
; CHECK: # %bb.0:		; CHECK-NOBMI: # %bb.0:
; CHECK-NEXT: movl %edi, %eax		; CHECK-NOBMI-NEXT: movl %edi, %eax
; CHECK-NEXT: shrq $32, %rdi		; CHECK-NOBMI-NEXT: shrq $32, %rdi
; CHECK-NEXT: cmpq %rdi, %rax		; CHECK-NOBMI-NEXT: cmpq %rdi, %rax
; CHECK-NEXT: setne %al		; CHECK-NOBMI-NEXT: setne %al
; CHECK-NEXT: retq		; CHECK-NOBMI-NEXT: retq
		;
		; CHECK-BMI2-LABEL: shr_to_shl_ne_i64_s32:
		; CHECK-BMI2: # %bb.0:
		; CHECK-BMI2-NEXT: rorxq $32, %rdi, %rax
		; CHECK-BMI2-NEXT: cmpq %rax, %rdi
		; CHECK-BMI2-NEXT: setne %al
		; CHECK-BMI2-NEXT: retq
%and = and i64 %x, 4294967295		%and = and i64 %x, 4294967295
%shr = lshr i64 %x, 32		%shr = lshr i64 %x, 32
%r = icmp ne i64 %and, %shr		%r = icmp ne i64 %and, %shr
ret i1 %r		ret i1 %r
}		}

define i1 @rorl_to_shl_eq_i64_s16(i64 %x) {		define i1 @rorl_to_shl_eq_i64_s16(i64 %x) {
; CHECK-NOBMI-LABEL: rorl_to_shl_eq_i64_s16:		; CHECK-NOBMI-LABEL: rorl_to_shl_eq_i64_s16:
Show All 27 Lines	; CHECK-NEXT: retq
%shr = ashr i64 %x, 32		%shr = ashr i64 %x, 32
%r = icmp ne i64 %and, %shr		%r = icmp ne i64 %and, %shr
ret i1 %r		ret i1 %r
}		}

define i1 @shl_to_shr_eq_i64_s63(i64 %x) {		define i1 @shl_to_shr_eq_i64_s63(i64 %x) {
; CHECK-LABEL: shl_to_shr_eq_i64_s63:		; CHECK-LABEL: shl_to_shr_eq_i64_s63:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000		; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: andq %rdi, %rax		; CHECK-NEXT: shrq $63, %rax
; CHECK-NEXT: shlq $63, %rdi		; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: cmpq %rax, %rdi		; CHECK-NEXT: cmpq %rax, %rdi
; CHECK-NEXT: sete %al		; CHECK-NEXT: sete %al
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%shl = shl i64 %x, 63		%shl = shl i64 %x, 63
%and = and i64 %x, 9223372036854775808		%and = and i64 %x, 9223372036854775808
%r = icmp eq i64 %shl, %and		%r = icmp eq i64 %shl, %and
ret i1 %r		ret i1 %r
}		}
Show All 9 Lines
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%shl = shl i64 %x, 63		%shl = shl i64 %x, 63
%and = and i64 %x, 9223372036854775808		%and = and i64 %x, 9223372036854775808
%r = icmp ugt i64 %shl, %and		%r = icmp ugt i64 %shl, %and
ret i1 %r		ret i1 %r
}		}

define i1 @shr_to_shl_eq_i64_s7(i64 %x) {		define i1 @shr_to_shl_eq_i64_s7(i64 %x) {
; CHECK-NOBMI-LABEL: shr_to_shl_eq_i64_s7:		; CHECK-LABEL: shr_to_shl_eq_i64_s7:
; CHECK-NOBMI: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NOBMI-NEXT: movabsq $144115188075855871, %rax # imm = 0x1FFFFFFFFFFFFFF		; CHECK-NEXT: movq %rdi, %rax
; CHECK-NOBMI-NEXT: andq %rdi, %rax		; CHECK-NEXT: shlq $7, %rax
; CHECK-NOBMI-NEXT: shrq $7, %rdi		; CHECK-NEXT: andq $-128, %rdi
; CHECK-NOBMI-NEXT: cmpq %rdi, %rax		; CHECK-NEXT: cmpq %rax, %rdi
; CHECK-NOBMI-NEXT: sete %al		; CHECK-NEXT: sete %al
; CHECK-NOBMI-NEXT: retq		; CHECK-NEXT: retq
;
; CHECK-BMI2-LABEL: shr_to_shl_eq_i64_s7:
; CHECK-BMI2: # %bb.0:
; CHECK-BMI2-NEXT: movb $57, %al
; CHECK-BMI2-NEXT: bzhiq %rax, %rdi, %rax
; CHECK-BMI2-NEXT: shrq $7, %rdi
; CHECK-BMI2-NEXT: cmpq %rdi, %rax
; CHECK-BMI2-NEXT: sete %al
; CHECK-BMI2-NEXT: retq
%and = and i64 %x, 144115188075855871		%and = and i64 %x, 144115188075855871
%shr = lshr i64 %x, 7		%shr = lshr i64 %x, 7
%r = icmp eq i64 %and, %shr		%r = icmp eq i64 %and, %shr
ret i1 %r		ret i1 %r
}		}

define i1 @shl_to_shr_ne_i32_s24(i32 %x) {		define i1 @shl_to_shr_ne_i32_s24(i32 %x) {
; CHECK-LABEL: shl_to_shr_ne_i32_s24:		; CHECK-LABEL: shl_to_shr_ne_i32_s24:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax		; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: shll $24, %eax		; CHECK-NEXT: shrl $24, %edi
; CHECK-NEXT: andl $-16777216, %edi # imm = 0xFF000000
; CHECK-NEXT: cmpl %edi, %eax		; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: setne %al		; CHECK-NEXT: setne %al
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%shl = shl i32 %x, 24		%shl = shl i32 %x, 24
%and = and i32 %x, 4278190080		%and = and i32 %x, 4278190080
%r = icmp ne i32 %shl, %and		%r = icmp ne i32 %shl, %and
ret i1 %r		ret i1 %r
}		}
Show All 9 Lines
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%shl = lshr i32 %x, 24		%shl = lshr i32 %x, 24
%and = and i32 %x, 4278190080		%and = and i32 %x, 4278190080
%r = icmp ne i32 %shl, %and		%r = icmp ne i32 %shl, %and
ret i1 %r		ret i1 %r
}		}

define i1 @shr_to_shl_ne_i32_s8(i32 %x) {		define i1 @shr_to_shl_ne_i32_s8(i32 %x) {
; CHECK-LABEL: shr_to_shl_ne_i32_s8:		; CHECK-NOBMI-LABEL: shr_to_shl_ne_i32_s8:
; CHECK: # %bb.0:		; CHECK-NOBMI: # %bb.0:
; CHECK-NEXT: movl %edi, %eax		; CHECK-NOBMI-NEXT: movl %edi, %eax
; CHECK-NEXT: andl $16777215, %eax # imm = 0xFFFFFF		; CHECK-NOBMI-NEXT: roll $8, %eax
; CHECK-NEXT: shrl $8, %edi		; CHECK-NOBMI-NEXT: cmpl %eax, %edi
; CHECK-NEXT: cmpl %edi, %eax		; CHECK-NOBMI-NEXT: setne %al
; CHECK-NEXT: setne %al		; CHECK-NOBMI-NEXT: retq
; CHECK-NEXT: retq		;
		; CHECK-BMI2-LABEL: shr_to_shl_ne_i32_s8:
		; CHECK-BMI2: # %bb.0:
		; CHECK-BMI2-NEXT: rorxl $24, %edi, %eax
		; CHECK-BMI2-NEXT: cmpl %eax, %edi
		; CHECK-BMI2-NEXT: setne %al
		; CHECK-BMI2-NEXT: retq
%and = and i32 %x, 16777215		%and = and i32 %x, 16777215
%shr = lshr i32 %x, 8		%shr = lshr i32 %x, 8
%r = icmp ne i32 %and, %shr		%r = icmp ne i32 %and, %shr
ret i1 %r		ret i1 %r
}		}

define <4 x i1> @shr_to_ror_eq_4xi32_s4(<4 x i32> %x) {		define <4 x i1> @shr_to_ror_eq_4xi32_s4(<4 x i32> %x) {
; CHECK-NOBMI-LABEL: shr_to_ror_eq_4xi32_s4:		; CHECK-NOBMI-LABEL: shr_to_ror_eq_4xi32_s4:
Show All 23 Lines
; CHECK-AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0		; CHECK-AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0		; CHECK-AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1		; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0		; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: retq		; CHECK-AVX2-NEXT: retq
;		;
; CHECK-AVX512-LABEL: shr_to_ror_eq_4xi32_s4:		; CHECK-AVX512-LABEL: shr_to_ror_eq_4xi32_s4:
; CHECK-AVX512: # %bb.0:		; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpsrld $4, %xmm0, %xmm1		; CHECK-AVX512-NEXT: vprold $4, %xmm0, %xmm1
; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0		; CHECK-AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0		; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq		; CHECK-AVX512-NEXT: retq
%shr = lshr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>		%shr = lshr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
%and = and <4 x i32> %x, <i32 268435455, i32 268435455, i32 268435455, i32 268435455>		%and = and <4 x i32> %x, <i32 268435455, i32 268435455, i32 268435455, i32 268435455>
%r = icmp ne <4 x i32> %shr, %and		%r = icmp ne <4 x i32> %shr, %and
ret <4 x i1> %r		ret <4 x i1> %r
}		}

Show All 24 Lines
; CHECK-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0		; CHECK-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0		; CHECK-AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1		; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0		; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: retq		; CHECK-AVX2-NEXT: retq
;		;
; CHECK-AVX512-LABEL: shl_to_ror_eq_4xi32_s8:		; CHECK-AVX512-LABEL: shl_to_ror_eq_4xi32_s8:
; CHECK-AVX512: # %bb.0:		; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpslld $8, %xmm0, %xmm1		; CHECK-AVX512-NEXT: vprold $8, %xmm0, %xmm1
; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0		; CHECK-AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0		; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq		; CHECK-AVX512-NEXT: retq
%shr = shl <4 x i32> %x, <i32 8, i32 8, i32 8, i32 8>		%shr = shl <4 x i32> %x, <i32 8, i32 8, i32 8, i32 8>
%and = and <4 x i32> %x, <i32 4294967040, i32 4294967040, i32 4294967040, i32 4294967040>		%and = and <4 x i32> %x, <i32 4294967040, i32 4294967040, i32 4294967040, i32 4294967040>
%r = icmp ne <4 x i32> %shr, %and		%r = icmp ne <4 x i32> %shr, %and
ret <4 x i1> %r		ret <4 x i1> %r
}		}

▲ Show 20 Lines • Show All 160 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombiner] Transform `(icmp eq/ne (and X,C0),(shift X,C1))` to use rotate or to getter constants.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 557744

llvm/include/llvm/CodeGen/TargetLowering.h

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/Target/X86/X86ISelLowering.h

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll

This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombiner] Transform `(icmp eq/ne (and X,C0),(shift X,C1))` to use rotate or to getter constants.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 557744

llvm/include/llvm/CodeGen/TargetLowering.h

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/Target/X86/X86ISelLowering.h

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll

[DAGCombiner] Transform `(icmp eq/ne (and X,C0),(shift X,C1))` to use rotate or to getter constants.
ClosedPublic