Diff 167788

lib/Target/X86/X86ISelDAGToDAG.cpp

Show First 20 Lines • Show All 451 Lines • ▼ Show 20 Lines	bool useNonTemporalLoad(LoadSDNode *N) const {
case 32:		case 32:
return Subtarget->hasAVX2();		return Subtarget->hasAVX2();
case 64:		case 64:
return Subtarget->hasAVX512();		return Subtarget->hasAVX512();
}		}
}		}

bool foldLoadStoreIntoMemOperand(SDNode *Node);		bool foldLoadStoreIntoMemOperand(SDNode *Node);
bool matchBEXTRFromAnd(SDNode *Node);
bool shrinkAndImmediate(SDNode *N);		bool shrinkAndImmediate(SDNode *N);
bool isMaskZeroExtended(SDNode *N) const;		bool isMaskZeroExtended(SDNode *N) const;
bool tryShiftAmountMod(SDNode *N);		bool tryShiftAmountMod(SDNode *N);

MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,		MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node);		const SDLoc &dl, MVT VT, SDNode *Node);
MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,		MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node,		const SDLoc &dl, MVT VT, SDNode *Node,
▲ Show 20 Lines • Show All 2,108 Lines • ▼ Show 20 Lines	bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
// Update Load Chain uses as well.		// Update Load Chain uses as well.
ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));		ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));		ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));		ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
CurDAG->RemoveDeadNode(Node);		CurDAG->RemoveDeadNode(Node);
return true;		return true;
}		}

// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
MVT NVT = Node->getSimpleValueType(0);
SDLoc dl(Node);

SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);

// If we have TBM we can use an immediate for the control. If we have BMI
// we should only do this if the BEXTR instruction is implemented well.
// Otherwise moving the control into a register makes this more costly.
// TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
// hoisting the move immediate would make it worthwhile with a less optimal
// BEXTR?
if (!Subtarget->hasTBM() &&
!(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
return false;

// Must have a shift right.
if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
return false;

// Shift can't have additional users.
if (!N0->hasOneUse())
return false;

// Only supported for 32 and 64 bits.
if (NVT != MVT::i32 && NVT != MVT::i64)
return false;

// Shift amount and RHS of and must be constant.
ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (!MaskCst \|\| !ShiftCst)
return false;

// And RHS must be a mask.
uint64_t Mask = MaskCst->getZExtValue();
if (!isMask_64(Mask))
return false;

uint64_t Shift = ShiftCst->getZExtValue();
uint64_t MaskSize = countPopulation(Mask);

// Don't interfere with something that can be handled by extracting AH.
// TODO: If we are able to fold a load, BEXTR might still be better than AH.
if (Shift == 8 && MaskSize == 8)
return false;

// Make sure we are only using bits that were in the original value, not
// shifted in.
if (Shift + MaskSize > NVT.getSizeInBits())
return false;

// Create a BEXTR node and run it through selection.
SDValue C = CurDAG->getConstant(Shift \| (MaskSize << 8), dl, NVT);
SDValue New = CurDAG->getNode(X86ISD::BEXTR, dl, NVT,
N0->getOperand(0), C);
ReplaceNode(Node, New.getNode());
SelectCode(New.getNode());
return true;
}

// Emit a PCMISTR(I/M) instruction.		// Emit a PCMISTR(I/M) instruction.
MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,		MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
bool MayFoldLoad, const SDLoc &dl,		bool MayFoldLoad, const SDLoc &dl,
MVT VT, SDNode *Node) {		MVT VT, SDNode *Node) {
SDValue N0 = Node->getOperand(0);		SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);		SDValue N1 = Node->getOperand(1);
SDValue Imm = Node->getOperand(2);		SDValue Imm = Node->getOperand(2);
const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();		const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
▲ Show 20 Lines • Show All 291 Lines • ▼ Show 20 Lines	void X86DAGToDAGISel::Select(SDNode *Node) {
case ISD::SRL:		case ISD::SRL:
case ISD::SRA:		case ISD::SRA:
case ISD::SHL:		case ISD::SHL:
if (tryShiftAmountMod(Node))		if (tryShiftAmountMod(Node))
return;		return;
break;		break;

case ISD::AND:		case ISD::AND:
if (matchBEXTRFromAnd(Node))
return;
if (AndImmShrink && shrinkAndImmediate(Node))		if (AndImmShrink && shrinkAndImmediate(Node))
return;		return;

LLVM_FALLTHROUGH;		LLVM_FALLTHROUGH;
case ISD::OR:		case ISD::OR:
case ISD::XOR: {		case ISD::XOR: {

// For operations of the form (x << C1) op C2, check if we can use a smaller		// For operations of the form (x << C1) op C2, check if we can use a smaller
▲ Show 20 Lines • Show All 601 Lines • Show Last 20 Lines

lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 35,218 Lines • ▼ Show 20 Lines	if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(MemOp)) {
return DAG.getNode(ISD::AND, dl, VT, Inp, LShr);		return DAG.getNode(ISD::AND, dl, VT, Inp, LShr);
}		}
}		}
}		}
}		}
return SDValue();		return SDValue();
}		}

		static bool hasBEXTR(const X86Subtarget &Subtarget, EVT VT) {
		// If we have TBM we can use an immediate for the control. If we have BMI
		// we should only do this if the BEXTR instruction is implemented well.
		// Otherwise moving the control into a register makes this more costly.
		// TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
		// hoisting the move immediate would make it worthwhile with a less optimal
		// BEXTR?
		if (!Subtarget.hasTBM() && !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
		return false;
		return (VT == MVT::i32 \|\| (VT == MVT::i64 && Subtarget.is64Bit()));
		}

		// Look for the pattern:
		// (X l>> C1) & C2 or (X l>> C1) & (C2 << C3)
		// Where C2 is all-ones in lowbits, and C3 might be zero,
		// and transform it into:
		// ((X l>> (C1 + C3)) & C2) << C3
		// Which is combined into:
		// (bextr X, (((C1 + C3) << 8) \| popcnt(C2))) << C3
		RKSimonUnsubmitted Done Reply Inline Actions Most of this could be replaced with: return (VT == MVT::i32 \|\| (VT == MVT::i64 && Subtarget.is64Bit())); RKSimon: Most of this could be replaced with: ``` return (VT == MVT::i32 \|\| (VT == MVT::i64 && Subtarget.
		// The last `<< C3` shift only exists if C3 is not zero.
		static SDValue combineShiftAndIntoBEXTR(SDNode *Node, SelectionDAG &DAG,
		const X86Subtarget &Subtarget) {
		assert(Node->getOpcode() == ISD::AND);
		RKSimonUnsubmitted Not Done Reply Inline Actions asserts should have a message RKSimon: asserts should have a message

		EVT NVT = Node->getValueType(0);

		// Check if subtarget has BEXTR instruction for the node's type
		craig.topperUnsubmitted Done Reply Inline Actions Remove the call to dump. craig.topper: Remove the call to dump.
		if (!hasBEXTR(Subtarget, NVT))
		return SDValue();

		SDLoc dl(Node);

		SDValue N0 = Node->getOperand(0);
		SDValue N1 = Node->getOperand(1);

		RKSimonUnsubmitted Done Reply Inline Actions You could just use the EVT value all the way through if you changed hasBEXTR to take an EVT instead of MVT RKSimon: You could just use the EVT value all the way through if you changed hasBEXTR to take an EVT…
		// Must have a shift right.
		if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
		return SDValue();

		// Shift can't have additional users.
		if (!N0->hasOneUse())
		return SDValue();

		// Shift amount and RHS of and must be constant.
		ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
		ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
		if (!MaskCst \|\| !ShiftCst)
		return SDValue();

		// And RHS must be a (potentially shifted) mask.
		uint64_t Mask = MaskCst->getZExtValue();
		if (!isShiftedMask_64(Mask))
		return SDValue();

		uint64_t MaskSize = countPopulation(Mask);
		uint64_t ZeroPaddingSize = countTrailingZeros(Mask);

		uint64_t Shift = ZeroPaddingSize + ShiftCst->getZExtValue();

		// Don't interfere with something that can be handled by extracting AH.
		// TODO: If we are able to fold a load, BEXTR might still be better than AH.
		if (Shift == 8 && MaskSize == 8 && ZeroPaddingSize == 0)
		return SDValue();

		// Make sure we are only using bits that were in the original value, not
		// shifted in.
		if (Shift + MaskSize > NVT.getSizeInBits())
		return SDValue();

		// Create a BEXTR node.
		SDValue C = DAG.getConstant(Shift \| (MaskSize << 8), dl, NVT);
		SDValue New = DAG.getNode(X86ISD::BEXTR, dl, NVT, N0->getOperand(0), C);

		// If the mask had some zero low bits, we need to re-introduce them.
		if (ZeroPaddingSize > 0) {
		SDValue Cp = DAG.getConstant(ZeroPaddingSize, dl, MVT::i8);
		New = DAG.getNode(ISD::SHL, dl, NVT, New, Cp);
		}

		return New;
		}

// Look for (and (ctpop X), 1) which is the IR form of __builtin_parity.		// Look for (and (ctpop X), 1) which is the IR form of __builtin_parity.
		craig.topperUnsubmitted Done Reply Inline Actions Remove call to dump. craig.topper: Remove call to dump.
// Turn it into series of XORs and a setnp.		// Turn it into series of XORs and a setnp.
static SDValue combineParity(SDNode *N, SelectionDAG &DAG,		static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {		const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);

// We only support 64-bit and 32-bit. 64-bit requires special handling		// We only support 64-bit and 32-bit. 64-bit requires special handling
// unless the 64-bit popcnt instruction is legal.		// unless the 64-bit popcnt instruction is legal.
if (VT != MVT::i32 && VT != MVT::i64)		if (VT != MVT::i32 && VT != MVT::i64)
▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines	if (DAG.MaskedValueIsZero(N->getOperand(1), HiMask) \|\|
DAG.getNode(ISD::AND, dl, MVT::i32, LHS, RHS));		DAG.getNode(ISD::AND, dl, MVT::i32, LHS, RHS));
}		}
}		}

// This must be done before legalization has expanded the ctpop.		// This must be done before legalization has expanded the ctpop.
if (SDValue V = combineParity(N, DAG, Subtarget))		if (SDValue V = combineParity(N, DAG, Subtarget))
return V;		return V;

		if (SDValue R = combineShiftAndIntoBEXTR(N, DAG, Subtarget))
		craig.topperUnsubmitted Done Reply Inline Actions This should probably be below the LegalizeOps check. We should give ample opportunity for AND based DAG combines to optimize this. craig.topper: This should probably be below the LegalizeOps check. We should give ample opportunity for AND…
		lebedev.riAuthorUnsubmitted Not Done Reply Inline Actions I'm not sure about the test for this. Also, should it be right after the `isBeforeLegalizeOps()`, or somewhere at the end of the block? lebedev.ri: I'm not sure about the test for this. Also, should it be right after the `isBeforeLegalizeOps…
		return R;

if (DCI.isBeforeLegalizeOps())		if (DCI.isBeforeLegalizeOps())
return SDValue();		return SDValue();

if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))		if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
return R;		return R;

if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))		if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
return FPLogic;		return FPLogic;
▲ Show 20 Lines • Show All 6,204 Lines • Show Last 20 Lines

test/CodeGen/X86/bmi-x86_64.ll

	Show First 20 Lines • Show All 96 Lines • ▼ Show 20 Lines
	; BEXTR-FAST-NEXT: bextrq %rax, %rdi, %rax			; BEXTR-FAST-NEXT: bextrq %rax, %rdi, %rax
	; BEXTR-FAST-NEXT: retq			; BEXTR-FAST-NEXT: retq
	entry:			entry:
	%shr = lshr i64 %a, 2			%shr = lshr i64 %a, 2
	%and = and i64 %shr, 8589934591			%and = and i64 %shr, 8589934591
	ret i64 %and			ret i64 %and
	}			}

	define i64 @non_bextr64(i64 %x) {			define i64 @non_bextr64(i64 %x) {
	; CHECK-LABEL: non_bextr64:			; BEXTR-SLOW-LABEL: non_bextr64:
				RKSimonUnsubmitted Not Done Reply Inline Actions Why is this call non_bextr64? RKSimon: Why is this call non_bextr64?
				lebedev.riAuthorUnsubmitted Not Done Reply Inline Actions That is how you named it when adding in rL232580 / https://github.com/llvm-mirror/llvm/commit/cbaefea0c0c1792390375b20c31b7c1fe8d0d2c7 Should i rename it? lebedev.ri: That is how you named it when adding in rL232580 / https://github.com/llvm…
				lebedev.riAuthorUnsubmitted Not Done Reply Inline Actions Whoops, typo, s/you//. Still, should i rename it? lebedev.ri: Whoops, typo, s/you//. Still, should i rename it?
	; CHECK: # %bb.0: # %entry			; BEXTR-SLOW: # %bb.0: # %entry
	; CHECK-NEXT: shrq $2, %rdi			; BEXTR-SLOW-NEXT: shrq $2, %rdi
	; CHECK-NEXT: movabsq $8589934590, %rax # imm = 0x1FFFFFFFE			; BEXTR-SLOW-NEXT: movabsq $8589934590, %rax # imm = 0x1FFFFFFFE
	; CHECK-NEXT: andq %rdi, %rax			; BEXTR-SLOW-NEXT: andq %rdi, %rax
	; CHECK-NEXT: retq			; BEXTR-SLOW-NEXT: retq
				;
				; BEXTR-FAST-LABEL: non_bextr64:
				; BEXTR-FAST: # %bb.0: # %entry
				; BEXTR-FAST-NEXT: movl $8195, %eax # imm = 0x2003
				; BEXTR-FAST-NEXT: bextrq %rax, %rdi, %rax
				; BEXTR-FAST-NEXT: addq %rax, %rax
				; BEXTR-FAST-NEXT: retq
	entry:			entry:
	%shr = lshr i64 %x, 2			%shr = lshr i64 %x, 2
	%and = and i64 %shr, 8589934590			%and = and i64 %shr, 8589934590
	ret i64 %and			ret i64 %and
	}			}

test/CodeGen/X86/extract-bits.ll

	Show First 20 Lines • Show All 5,609 Lines • ▼ Show 20 Lines
	}			}

	; ---------------------------------------------------------------------------- ;			; ---------------------------------------------------------------------------- ;
	; Constant			; Constant
	; ---------------------------------------------------------------------------- ;			; ---------------------------------------------------------------------------- ;

	; https://bugs.llvm.org/show_bug.cgi?id=38938			; https://bugs.llvm.org/show_bug.cgi?id=38938
	define void @pr38938(i32* %a0, i64* %a1) {			define void @pr38938(i32* %a0, i64* %a1) {
	; X86-LABEL: pr38938:			; X86-NOBMI-LABEL: pr38938:
	; X86: # %bb.0:			; X86-NOBMI: # %bb.0:
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx			; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
	; X86-NEXT: movl (%ecx), %ecx			; X86-NOBMI-NEXT: movl (%ecx), %ecx
	; X86-NEXT: shrl $19, %ecx			; X86-NOBMI-NEXT: shrl $19, %ecx
	; X86-NEXT: andl $4092, %ecx # imm = 0xFFC			; X86-NOBMI-NEXT: andl $4092, %ecx # imm = 0xFFC
	; X86-NEXT: incl (%eax,%ecx)			; X86-NOBMI-NEXT: incl (%eax,%ecx)
	; X86-NEXT: retl			; X86-NOBMI-NEXT: retl
	;			;
	; X64-LABEL: pr38938:			; X86-BMI1NOTBM-LABEL: pr38938:
	; X64: # %bb.0:			; X86-BMI1NOTBM: # %bb.0:
	; X64-NEXT: movq (%rsi), %rax			; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X64-NEXT: shrq $19, %rax			; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx
	; X64-NEXT: andl $4092, %eax # imm = 0xFFC			; X86-BMI1NOTBM-NEXT: movl $2581, %edx # imm = 0xA15
	; X64-NEXT: incl (%rdi,%rax)			; X86-BMI1NOTBM-NEXT: bextrl %edx, (%ecx), %ecx
	; X64-NEXT: retq			; X86-BMI1NOTBM-NEXT: incl (%eax,%ecx,4)
				; X86-BMI1NOTBM-NEXT: retl
				;
				; X86-BMI1TBM-LABEL: pr38938:
				; X86-BMI1TBM: # %bb.0:
				; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax
				; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %ecx
				; X86-BMI1TBM-NEXT: bextrl $2581, (%ecx), %ecx # imm = 0xA15
				; X86-BMI1TBM-NEXT: incl (%eax,%ecx,4)
				; X86-BMI1TBM-NEXT: retl
				;
				; X86-BMI1NOTBMBMI2-LABEL: pr38938:
				; X86-BMI1NOTBMBMI2: # %bb.0:
				; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
				; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
				; X86-BMI1NOTBMBMI2-NEXT: movl $2581, %edx # imm = 0xA15
				; X86-BMI1NOTBMBMI2-NEXT: bextrl %edx, (%ecx), %ecx
				; X86-BMI1NOTBMBMI2-NEXT: incl (%eax,%ecx,4)
				; X86-BMI1NOTBMBMI2-NEXT: retl
				;
				; X64-NOBMI-LABEL: pr38938:
				; X64-NOBMI: # %bb.0:
				; X64-NOBMI-NEXT: movq (%rsi), %rax
				; X64-NOBMI-NEXT: shrq $19, %rax
				; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC
				; X64-NOBMI-NEXT: incl (%rdi,%rax)
				; X64-NOBMI-NEXT: retq
				;
				; X64-BMI1NOTBM-LABEL: pr38938:
				; X64-BMI1NOTBM: # %bb.0:
				; X64-BMI1NOTBM-NEXT: movl $2581, %eax # imm = 0xA15
				; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rsi), %rax
				; X64-BMI1NOTBM-NEXT: incl (%rdi,%rax,4)
				; X64-BMI1NOTBM-NEXT: retq
				;
				; X64-BMI1TBM-LABEL: pr38938:
				; X64-BMI1TBM: # %bb.0:
				; X64-BMI1TBM-NEXT: bextrq $2581, (%rsi), %rax # imm = 0xA15
				; X64-BMI1TBM-NEXT: incl (%rdi,%rax,4)
				; X64-BMI1TBM-NEXT: retq
				;
				; X64-BMI1NOTBMBMI2-LABEL: pr38938:
				; X64-BMI1NOTBMBMI2: # %bb.0:
				; X64-BMI1NOTBMBMI2-NEXT: movl $2581, %eax # imm = 0xA15
				; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, (%rsi), %rax
				; X64-BMI1NOTBMBMI2-NEXT: incl (%rdi,%rax,4)
				; X64-BMI1NOTBMBMI2-NEXT: retq
	%tmp = load i64, i64* %a1, align 8			%tmp = load i64, i64* %a1, align 8
	%tmp1 = lshr i64 %tmp, 21			%tmp1 = lshr i64 %tmp, 21
	%tmp2 = and i64 %tmp1, 1023			%tmp2 = and i64 %tmp1, 1023
	%tmp3 = getelementptr inbounds i32, i32* %a0, i64 %tmp2			%tmp3 = getelementptr inbounds i32, i32* %a0, i64 %tmp2
	%tmp4 = load i32, i32* %tmp3, align 4			%tmp4 = load i32, i32* %tmp3, align 4
	%tmp5 = add nsw i32 %tmp4, 1			%tmp5 = add nsw i32 %tmp4, 1
	store i32 %tmp5, i32* %tmp3, align 4			store i32 %tmp5, i32* %tmp3, align 4
	ret void			ret void
	▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines
	; X64-BMI1NOTBMBMI2-NEXT: retq			; X64-BMI1NOTBMBMI2-NEXT: retq
	%tmp0 = lshr i32 %arg, 19			%tmp0 = lshr i32 %arg, 19
	%tmp1 = and i32 %tmp0, 1023			%tmp1 = and i32 %tmp0, 1023
	ret i32 %tmp1			ret i32 %tmp1
	}			}

	; Should be still fine, but the mask is shifted			; Should be still fine, but the mask is shifted
	define i32 @c1_i32(i32 %arg) {			define i32 @c1_i32(i32 %arg) {
	; X86-LABEL: c1_i32:			; X86-NOBMI-LABEL: c1_i32:
	; X86: # %bb.0:			; X86-NOBMI: # %bb.0:
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: shrl $19, %eax			; X86-NOBMI-NEXT: shrl $19, %eax
	; X86-NEXT: andl $4092, %eax # imm = 0xFFC			; X86-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC
	; X86-NEXT: retl			; X86-NOBMI-NEXT: retl
	;			;
	; X64-LABEL: c1_i32:			; X86-BMI1NOTBM-LABEL: c1_i32:
	; X64: # %bb.0:			; X86-BMI1NOTBM: # %bb.0:
	; X64-NEXT: movl %edi, %eax			; X86-BMI1NOTBM-NEXT: movl $2581, %eax # imm = 0xA15
	; X64-NEXT: shrl $19, %eax			; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
	; X64-NEXT: andl $4092, %eax # imm = 0xFFC			; X86-BMI1NOTBM-NEXT: shll $2, %eax
	; X64-NEXT: retq			; X86-BMI1NOTBM-NEXT: retl
				;
				; X86-BMI1TBM-LABEL: c1_i32:
				; X86-BMI1TBM: # %bb.0:
				; X86-BMI1TBM-NEXT: bextrl $2581, {{[0-9]+}}(%esp), %eax # imm = 0xA15
				; X86-BMI1TBM-NEXT: shll $2, %eax
				; X86-BMI1TBM-NEXT: retl
				;
				; X86-BMI1NOTBMBMI2-LABEL: c1_i32:
				; X86-BMI1NOTBMBMI2: # %bb.0:
				; X86-BMI1NOTBMBMI2-NEXT: movl $2581, %eax # imm = 0xA15
				; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
				; X86-BMI1NOTBMBMI2-NEXT: shll $2, %eax
				; X86-BMI1NOTBMBMI2-NEXT: retl
				;
				; X64-NOBMI-LABEL: c1_i32:
				; X64-NOBMI: # %bb.0:
				; X64-NOBMI-NEXT: movl %edi, %eax
				; X64-NOBMI-NEXT: shrl $19, %eax
				; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC
				; X64-NOBMI-NEXT: retq
				;
				; X64-BMI1NOTBM-LABEL: c1_i32:
				; X64-BMI1NOTBM: # %bb.0:
				; X64-BMI1NOTBM-NEXT: movl $2581, %eax # imm = 0xA15
				; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax
				; X64-BMI1NOTBM-NEXT: shll $2, %eax
				; X64-BMI1NOTBM-NEXT: retq
				;
				; X64-BMI1TBM-LABEL: c1_i32:
				; X64-BMI1TBM: # %bb.0:
				; X64-BMI1TBM-NEXT: bextrl $2581, %edi, %eax # imm = 0xA15
				; X64-BMI1TBM-NEXT: shll $2, %eax
				; X64-BMI1TBM-NEXT: retq
				;
				; X64-BMI1NOTBMBMI2-LABEL: c1_i32:
				; X64-BMI1NOTBMBMI2: # %bb.0:
				; X64-BMI1NOTBMBMI2-NEXT: movl $2581, %eax # imm = 0xA15
				; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax
				craig.topperUnsubmitted Not Done Reply Inline Actions This is not an improvement. We traded a shift right plus an and for a move immediate, a 2 uop bextr, and a shift left. So we went from 2 uops to 4. At least on Haswell. craig.topper: This is not an improvement. We traded a shift right plus an and for a move immediate, a 2 uop…
				lebedev.riAuthorUnsubmitted Not Done Reply Inline Actions Aha. So the D52293 has the same problem, obviously. Is adding `FeatureSlowBEXTR` the way forward? lebedev.ri: Aha. So the D52293 has the same problem, obviously. Is adding `FeatureSlowBEXTR` the way…
				craig.topperUnsubmitted Not Done Reply Inline Actions It's still an increase in instruction even on AMD in the BMI1 case. We still went from 2 uops to 3 uops. We'd only be ok with BEXTRI TBM instruction. In the case from PR38938 we were able to fold the shl into an addressing calculation which made it beneficial. craig.topper: It's still an increase in instruction even on AMD in the BMI1 case. We still went from 2 uops…
				; X64-BMI1NOTBMBMI2-NEXT: shll $2, %eax
				; X64-BMI1NOTBMBMI2-NEXT: retq
	%tmp0 = lshr i32 %arg, 19			%tmp0 = lshr i32 %arg, 19
	%tmp1 = and i32 %tmp0, 4092			%tmp1 = and i32 %tmp0, 4092
	ret i32 %tmp1			ret i32 %tmp1
	}			}

	; Should be still fine, but the result is shifted left afterwards			; Should be still fine, but the result is shifted left afterwards
	define i32 @c2_i32(i32 %arg) {			define i32 @c2_i32(i32 %arg) {
	; X86-LABEL: c2_i32:			; X86-NOBMI-LABEL: c2_i32:
	; X86: # %bb.0:			; X86-NOBMI: # %bb.0:
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: shrl $17, %eax			; X86-NOBMI-NEXT: shrl $17, %eax
	; X86-NEXT: andl $4092, %eax # imm = 0xFFC			; X86-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC
	; X86-NEXT: retl			; X86-NOBMI-NEXT: retl
	;			;
	; X64-LABEL: c2_i32:			; X86-BMI1NOTBM-LABEL: c2_i32:
	; X64: # %bb.0:			; X86-BMI1NOTBM: # %bb.0:
	; X64-NEXT: movl %edi, %eax			; X86-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13
	; X64-NEXT: shrl $17, %eax			; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
	; X64-NEXT: andl $4092, %eax # imm = 0xFFC			; X86-BMI1NOTBM-NEXT: shll $2, %eax
	; X64-NEXT: retq			; X86-BMI1NOTBM-NEXT: retl
				;
				; X86-BMI1TBM-LABEL: c2_i32:
				; X86-BMI1TBM: # %bb.0:
				; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13
				; X86-BMI1TBM-NEXT: shll $2, %eax
				; X86-BMI1TBM-NEXT: retl
				;
				; X86-BMI1NOTBMBMI2-LABEL: c2_i32:
				; X86-BMI1NOTBMBMI2: # %bb.0:
				; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13
				; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
				; X86-BMI1NOTBMBMI2-NEXT: shll $2, %eax
				; X86-BMI1NOTBMBMI2-NEXT: retl
				;
				; X64-NOBMI-LABEL: c2_i32:
				; X64-NOBMI: # %bb.0:
				; X64-NOBMI-NEXT: movl %edi, %eax
				; X64-NOBMI-NEXT: shrl $17, %eax
				; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC
				; X64-NOBMI-NEXT: retq
				;
				; X64-BMI1NOTBM-LABEL: c2_i32:
				; X64-BMI1NOTBM: # %bb.0:
				; X64-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13
				; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax
				; X64-BMI1NOTBM-NEXT: shll $2, %eax
				; X64-BMI1NOTBM-NEXT: retq
				;
				; X64-BMI1TBM-LABEL: c2_i32:
				; X64-BMI1TBM: # %bb.0:
				; X64-BMI1TBM-NEXT: bextrl $2579, %edi, %eax # imm = 0xA13
				; X64-BMI1TBM-NEXT: shll $2, %eax
				; X64-BMI1TBM-NEXT: retq
				;
				; X64-BMI1NOTBMBMI2-LABEL: c2_i32:
				; X64-BMI1NOTBMBMI2: # %bb.0:
				; X64-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13
				; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax
				; X64-BMI1NOTBMBMI2-NEXT: shll $2, %eax
				; X64-BMI1NOTBMBMI2-NEXT: retq
	%tmp0 = lshr i32 %arg, 19			%tmp0 = lshr i32 %arg, 19
	%tmp1 = and i32 %tmp0, 1023			%tmp1 = and i32 %tmp0, 1023
	%tmp2 = shl i32 %tmp1, 2			%tmp2 = shl i32 %tmp1, 2
	ret i32 %tmp2			ret i32 %tmp2
	}			}

	; The mask covers newly shifted-in bit			; The mask covers newly shifted-in bit
	define i32 @c4_i32_bad(i32 %arg) {			define i32 @c4_i32_bad(i32 %arg) {
	; X86-LABEL: c4_i32_bad:			; X86-NOBMI-LABEL: c4_i32_bad:
	; X86: # %bb.0:			; X86-NOBMI: # %bb.0:
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: shrl $19, %eax			; X86-NOBMI-NEXT: shrl $19, %eax
	; X86-NEXT: andl $-2, %eax			; X86-NOBMI-NEXT: andl $-2, %eax
	; X86-NEXT: retl			; X86-NOBMI-NEXT: retl
	;			;
	; X64-LABEL: c4_i32_bad:			; X86-BMI1NOTBM-LABEL: c4_i32_bad:
	; X64: # %bb.0:			; X86-BMI1NOTBM: # %bb.0:
	; X64-NEXT: movl %edi, %eax			; X86-BMI1NOTBM-NEXT: movl $3092, %eax # imm = 0xC14
	; X64-NEXT: shrl $19, %eax			; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
	; X64-NEXT: andl $-2, %eax			; X86-BMI1NOTBM-NEXT: addl %eax, %eax
	; X64-NEXT: retq			; X86-BMI1NOTBM-NEXT: retl
				;
				; X86-BMI1TBM-LABEL: c4_i32_bad:
				; X86-BMI1TBM: # %bb.0:
				; X86-BMI1TBM-NEXT: bextrl $3092, {{[0-9]+}}(%esp), %eax # imm = 0xC14
				; X86-BMI1TBM-NEXT: addl %eax, %eax
				; X86-BMI1TBM-NEXT: retl
				;
				; X86-BMI1NOTBMBMI2-LABEL: c4_i32_bad:
				; X86-BMI1NOTBMBMI2: # %bb.0:
				; X86-BMI1NOTBMBMI2-NEXT: movl $3092, %eax # imm = 0xC14
				; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
				; X86-BMI1NOTBMBMI2-NEXT: addl %eax, %eax
				; X86-BMI1NOTBMBMI2-NEXT: retl
				;
				; X64-NOBMI-LABEL: c4_i32_bad:
				; X64-NOBMI: # %bb.0:
				; X64-NOBMI-NEXT: movl %edi, %eax
				; X64-NOBMI-NEXT: shrl $19, %eax
				; X64-NOBMI-NEXT: andl $-2, %eax
				; X64-NOBMI-NEXT: retq
				;
				; X64-BMI1NOTBM-LABEL: c4_i32_bad:
				; X64-BMI1NOTBM: # %bb.0:
				; X64-BMI1NOTBM-NEXT: movl $3092, %eax # imm = 0xC14
				; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax
				; X64-BMI1NOTBM-NEXT: addl %eax, %eax
				; X64-BMI1NOTBM-NEXT: retq
				;
				; X64-BMI1TBM-LABEL: c4_i32_bad:
				; X64-BMI1TBM: # %bb.0:
				; X64-BMI1TBM-NEXT: bextrl $3092, %edi, %eax # imm = 0xC14
				; X64-BMI1TBM-NEXT: addl %eax, %eax
				; X64-BMI1TBM-NEXT: retq
				;
				; X64-BMI1NOTBMBMI2-LABEL: c4_i32_bad:
				; X64-BMI1NOTBMBMI2: # %bb.0:
				; X64-BMI1NOTBMBMI2-NEXT: movl $3092, %eax # imm = 0xC14
				; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax
				; X64-BMI1NOTBMBMI2-NEXT: addl %eax, %eax
				; X64-BMI1NOTBMBMI2-NEXT: retq
	%tmp0 = lshr i32 %arg, 19			%tmp0 = lshr i32 %arg, 19
	%tmp1 = and i32 %tmp0, 16382			%tmp1 = and i32 %tmp0, 16382
	ret i32 %tmp1			ret i32 %tmp1
	}			}

	; i64			; i64

	; The most canonical variant			; The most canonical variant
	▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
	; X64-BMI1NOTBMBMI2-NEXT: retq			; X64-BMI1NOTBMBMI2-NEXT: retq
	%tmp0 = lshr i64 %arg, 51			%tmp0 = lshr i64 %arg, 51
	%tmp1 = and i64 %tmp0, 1023			%tmp1 = and i64 %tmp0, 1023
	ret i64 %tmp1			ret i64 %tmp1
	}			}

	; Should be still fine, but the mask is shifted			; Should be still fine, but the mask is shifted
	define i64 @c1_i64(i64 %arg) {			define i64 @c1_i64(i64 %arg) {
	; X86-LABEL: c1_i64:			; X86-NOBMI-LABEL: c1_i64:
	; X86: # %bb.0:			; X86-NOBMI: # %bb.0:
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: shrl $19, %eax			; X86-NOBMI-NEXT: shrl $19, %eax
	; X86-NEXT: andl $4092, %eax # imm = 0xFFC			; X86-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC
	; X86-NEXT: xorl %edx, %edx			; X86-NOBMI-NEXT: xorl %edx, %edx
	; X86-NEXT: retl			; X86-NOBMI-NEXT: retl
	;			;
	; X64-LABEL: c1_i64:			; X86-BMI1NOTBM-LABEL: c1_i64:
	; X64: # %bb.0:			; X86-BMI1NOTBM: # %bb.0:
	; X64-NEXT: movq %rdi, %rax			; X86-BMI1NOTBM-NEXT: movl $2581, %eax # imm = 0xA15
	; X64-NEXT: shrq $51, %rax			; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
	; X64-NEXT: andl $4092, %eax # imm = 0xFFC			; X86-BMI1NOTBM-NEXT: shll $2, %eax
	; X64-NEXT: retq			; X86-BMI1NOTBM-NEXT: xorl %edx, %edx
				; X86-BMI1NOTBM-NEXT: retl
				;
				; X86-BMI1TBM-LABEL: c1_i64:
				; X86-BMI1TBM: # %bb.0:
				; X86-BMI1TBM-NEXT: bextrl $2581, {{[0-9]+}}(%esp), %eax # imm = 0xA15
				; X86-BMI1TBM-NEXT: shll $2, %eax
				; X86-BMI1TBM-NEXT: xorl %edx, %edx
				; X86-BMI1TBM-NEXT: retl
				;
				; X86-BMI1NOTBMBMI2-LABEL: c1_i64:
				; X86-BMI1NOTBMBMI2: # %bb.0:
				; X86-BMI1NOTBMBMI2-NEXT: movl $2581, %eax # imm = 0xA15
				; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
				; X86-BMI1NOTBMBMI2-NEXT: shll $2, %eax
				; X86-BMI1NOTBMBMI2-NEXT: xorl %edx, %edx
				; X86-BMI1NOTBMBMI2-NEXT: retl
				;
				; X64-NOBMI-LABEL: c1_i64:
				; X64-NOBMI: # %bb.0:
				; X64-NOBMI-NEXT: movq %rdi, %rax
				; X64-NOBMI-NEXT: shrq $51, %rax
				; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC
				; X64-NOBMI-NEXT: retq
				;
				; X64-BMI1NOTBM-LABEL: c1_i64:
				; X64-BMI1NOTBM: # %bb.0:
				; X64-BMI1NOTBM-NEXT: movl $2613, %eax # imm = 0xA35
				; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax
				; X64-BMI1NOTBM-NEXT: shlq $2, %rax
				; X64-BMI1NOTBM-NEXT: retq
				;
				; X64-BMI1TBM-LABEL: c1_i64:
				; X64-BMI1TBM: # %bb.0:
				; X64-BMI1TBM-NEXT: bextrq $2613, %rdi, %rax # imm = 0xA35
				; X64-BMI1TBM-NEXT: shlq $2, %rax
				; X64-BMI1TBM-NEXT: retq
				;
				; X64-BMI1NOTBMBMI2-LABEL: c1_i64:
				; X64-BMI1NOTBMBMI2: # %bb.0:
				; X64-BMI1NOTBMBMI2-NEXT: movl $2613, %eax # imm = 0xA35
				; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, %rdi, %rax
				; X64-BMI1NOTBMBMI2-NEXT: shlq $2, %rax
				; X64-BMI1NOTBMBMI2-NEXT: retq
	%tmp0 = lshr i64 %arg, 51			%tmp0 = lshr i64 %arg, 51
	%tmp1 = and i64 %tmp0, 4092			%tmp1 = and i64 %tmp0, 4092
	ret i64 %tmp1			ret i64 %tmp1
	}			}

	; Should be still fine, but the result is shifted left afterwards			; Should be still fine, but the result is shifted left afterwards
	define i64 @c2_i64(i64 %arg) {			define i64 @c2_i64(i64 %arg) {
	; X86-LABEL: c2_i64:			; X86-NOBMI-LABEL: c2_i64:
	; X86: # %bb.0:			; X86-NOBMI: # %bb.0:
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: shrl $17, %eax			; X86-NOBMI-NEXT: shrl $17, %eax
	; X86-NEXT: andl $4092, %eax # imm = 0xFFC			; X86-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC
	; X86-NEXT: xorl %edx, %edx			; X86-NOBMI-NEXT: xorl %edx, %edx
	; X86-NEXT: retl			; X86-NOBMI-NEXT: retl
	;			;
	; X64-LABEL: c2_i64:			; X86-BMI1NOTBM-LABEL: c2_i64:
	; X64: # %bb.0:			; X86-BMI1NOTBM: # %bb.0:
	; X64-NEXT: movq %rdi, %rax			; X86-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13
	; X64-NEXT: shrq $49, %rax			; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
	; X64-NEXT: andl $4092, %eax # imm = 0xFFC			; X86-BMI1NOTBM-NEXT: shll $2, %eax
	; X64-NEXT: retq			; X86-BMI1NOTBM-NEXT: xorl %edx, %edx
				; X86-BMI1NOTBM-NEXT: retl
				;
				; X86-BMI1TBM-LABEL: c2_i64:
				; X86-BMI1TBM: # %bb.0:
				; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13
				; X86-BMI1TBM-NEXT: shll $2, %eax
				; X86-BMI1TBM-NEXT: xorl %edx, %edx
				; X86-BMI1TBM-NEXT: retl
				;
				; X86-BMI1NOTBMBMI2-LABEL: c2_i64:
				; X86-BMI1NOTBMBMI2: # %bb.0:
				; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13
				; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
				; X86-BMI1NOTBMBMI2-NEXT: shll $2, %eax
				; X86-BMI1NOTBMBMI2-NEXT: xorl %edx, %edx
				; X86-BMI1NOTBMBMI2-NEXT: retl
				;
				; X64-NOBMI-LABEL: c2_i64:
				; X64-NOBMI: # %bb.0:
				; X64-NOBMI-NEXT: movq %rdi, %rax
				; X64-NOBMI-NEXT: shrq $49, %rax
				; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC
				; X64-NOBMI-NEXT: retq
				;
				; X64-BMI1NOTBM-LABEL: c2_i64:
				; X64-BMI1NOTBM: # %bb.0:
				; X64-BMI1NOTBM-NEXT: movl $2611, %eax # imm = 0xA33
				; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax
				; X64-BMI1NOTBM-NEXT: shlq $2, %rax
				; X64-BMI1NOTBM-NEXT: retq
				;
				; X64-BMI1TBM-LABEL: c2_i64:
				; X64-BMI1TBM: # %bb.0:
				; X64-BMI1TBM-NEXT: bextrq $2611, %rdi, %rax # imm = 0xA33
				; X64-BMI1TBM-NEXT: shlq $2, %rax
				; X64-BMI1TBM-NEXT: retq
				;
				; X64-BMI1NOTBMBMI2-LABEL: c2_i64:
				; X64-BMI1NOTBMBMI2: # %bb.0:
				; X64-BMI1NOTBMBMI2-NEXT: movl $2611, %eax # imm = 0xA33
				; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, %rdi, %rax
				; X64-BMI1NOTBMBMI2-NEXT: shlq $2, %rax
				; X64-BMI1NOTBMBMI2-NEXT: retq
	%tmp0 = lshr i64 %arg, 51			%tmp0 = lshr i64 %arg, 51
	%tmp1 = and i64 %tmp0, 1023			%tmp1 = and i64 %tmp0, 1023
	%tmp2 = shl i64 %tmp1, 2			%tmp2 = shl i64 %tmp1, 2
	ret i64 %tmp2			ret i64 %tmp2
	}			}

	; The mask covers newly shifted-in bit			; The mask covers newly shifted-in bit
	define i64 @c4_i64_bad(i64 %arg) {			define i64 @c4_i64_bad(i64 %arg) {
	; X86-LABEL: c4_i64_bad:			; X86-NOBMI-LABEL: c4_i64_bad:
	; X86: # %bb.0:			; X86-NOBMI: # %bb.0:
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: shrl $19, %eax			; X86-NOBMI-NEXT: shrl $19, %eax
	; X86-NEXT: andl $-2, %eax			; X86-NOBMI-NEXT: andl $-2, %eax
	; X86-NEXT: xorl %edx, %edx			; X86-NOBMI-NEXT: xorl %edx, %edx
	; X86-NEXT: retl			; X86-NOBMI-NEXT: retl
	;			;
	; X64-LABEL: c4_i64_bad:			; X86-BMI1NOTBM-LABEL: c4_i64_bad:
	; X64: # %bb.0:			; X86-BMI1NOTBM: # %bb.0:
	; X64-NEXT: movq %rdi, %rax			; X86-BMI1NOTBM-NEXT: movl $3092, %eax # imm = 0xC14
	; X64-NEXT: shrq $51, %rax			; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
	; X64-NEXT: andl $-2, %eax			; X86-BMI1NOTBM-NEXT: addl %eax, %eax
	; X64-NEXT: retq			; X86-BMI1NOTBM-NEXT: xorl %edx, %edx
				; X86-BMI1NOTBM-NEXT: retl
				;
				; X86-BMI1TBM-LABEL: c4_i64_bad:
				; X86-BMI1TBM: # %bb.0:
				; X86-BMI1TBM-NEXT: bextrl $3092, {{[0-9]+}}(%esp), %eax # imm = 0xC14
				; X86-BMI1TBM-NEXT: addl %eax, %eax
				; X86-BMI1TBM-NEXT: xorl %edx, %edx
				; X86-BMI1TBM-NEXT: retl
				;
				; X86-BMI1NOTBMBMI2-LABEL: c4_i64_bad:
				; X86-BMI1NOTBMBMI2: # %bb.0:
				; X86-BMI1NOTBMBMI2-NEXT: movl $3092, %eax # imm = 0xC14
				; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
				; X86-BMI1NOTBMBMI2-NEXT: addl %eax, %eax
				; X86-BMI1NOTBMBMI2-NEXT: xorl %edx, %edx
				; X86-BMI1NOTBMBMI2-NEXT: retl
				;
				; X64-NOBMI-LABEL: c4_i64_bad:
				; X64-NOBMI: # %bb.0:
				; X64-NOBMI-NEXT: movq %rdi, %rax
				; X64-NOBMI-NEXT: shrq $51, %rax
				; X64-NOBMI-NEXT: andl $-2, %eax
				; X64-NOBMI-NEXT: retq
				;
				; X64-BMI1NOTBM-LABEL: c4_i64_bad:
				; X64-BMI1NOTBM: # %bb.0:
				; X64-BMI1NOTBM-NEXT: movl $3124, %eax # imm = 0xC34
				; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax
				; X64-BMI1NOTBM-NEXT: addq %rax, %rax
				; X64-BMI1NOTBM-NEXT: retq
				;
				; X64-BMI1TBM-LABEL: c4_i64_bad:
				; X64-BMI1TBM: # %bb.0:
				; X64-BMI1TBM-NEXT: bextrq $3124, %rdi, %rax # imm = 0xC34
				; X64-BMI1TBM-NEXT: addq %rax, %rax
				; X64-BMI1TBM-NEXT: retq
				;
				; X64-BMI1NOTBMBMI2-LABEL: c4_i64_bad:
				; X64-BMI1NOTBMBMI2: # %bb.0:
				; X64-BMI1NOTBMBMI2-NEXT: movl $3124, %eax # imm = 0xC34
				; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, %rdi, %rax
				; X64-BMI1NOTBMBMI2-NEXT: addq %rax, %rax
				; X64-BMI1NOTBMBMI2-NEXT: retq
	%tmp0 = lshr i64 %arg, 51			%tmp0 = lshr i64 %arg, 51
	%tmp1 = and i64 %tmp0, 16382			%tmp1 = and i64 %tmp0, 16382
	ret i64 %tmp1			ret i64 %tmp1
	}			}

test/CodeGen/X86/tbm_patterns.ll

Show First 20 Lines • Show All 43 Lines • ▼ Show 20 Lines
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%t0 = lshr i32 %a, 4		%t0 = lshr i32 %a, 4
%t1 = and i32 %t0, 4095		%t1 = and i32 %t0, 4095
%t2 = icmp eq i32 %t1, 0		%t2 = icmp eq i32 %t1, 0
%t3 = select i1 %t2, i32 %b, i32 %t1		%t3 = select i1 %t2, i32 %b, i32 %t1
ret i32 %t3		ret i32 %t3
}		}

define i32 @test_x86_tbm_bextri_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {		define i32 @test_x86_tbm_bextri_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
; CHECK-LABEL: test_x86_tbm_bextri_u32_z2:		; CHECK-LABEL: test_x86_tbm_bextri_u32_z2:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax		; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: shrl $4, %edi		; CHECK-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
; CHECK-NEXT: testl $4095, %edi # imm = 0xFFF
; CHECK-NEXT: cmovnel %edx, %eax		; CHECK-NEXT: cmovnel %edx, %eax
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%t0 = lshr i32 %a, 4		%t0 = lshr i32 %a, 4
%t1 = and i32 %t0, 4095		%t1 = and i32 %t0, 4095
%t2 = icmp eq i32 %t1, 0		%t2 = icmp eq i32 %t1, 0
%t3 = select i1 %t2, i32 %b, i32 %c		%t3 = select i1 %t2, i32 %b, i32 %c
ret i32 %t3		ret i32 %t3
}		}
		lebedev.riAuthorUnsubmitted Done Reply Inline Actions This seems to be a miscompile, `BEXTR` does not touch EFLAGS? lebedev.ri: This seems to be a miscompile, `BEXTR` does not touch EFLAGS?
		lebedev.riAuthorUnsubmitted Not Done Reply Inline Actions Aha, it is not, i just can't read. http://www.felixcloutier.com/x86/BEXTR.html#flags-affected lebedev.ri: Aha, it is not, i just can't read. http://www.felixcloutier.com/x86/BEXTR.html#flags-affected

define i64 @test_x86_tbm_bextri_u64(i64 %a) nounwind {		define i64 @test_x86_tbm_bextri_u64(i64 %a) nounwind {
; CHECK-LABEL: test_x86_tbm_bextri_u64:		; CHECK-LABEL: test_x86_tbm_bextri_u64:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: bextrl $3076, %edi, %eax # imm = 0xC04		; CHECK-NEXT: bextrl $3076, %edi, %eax # imm = 0xC04
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%t0 = lshr i64 %a, 4		%t0 = lshr i64 %a, 4
%t1 = and i64 %t0, 4095		%t1 = and i64 %t0, 4095
Show All 35 Lines	; CHECK-NEXT: retq
%t3 = select i1 %t2, i64 %b, i64 %t1		%t3 = select i1 %t2, i64 %b, i64 %t1
ret i64 %t3		ret i64 %t3
}		}

define i64 @test_x86_tbm_bextri_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {		define i64 @test_x86_tbm_bextri_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
; CHECK-LABEL: test_x86_tbm_bextri_u64_z2:		; CHECK-LABEL: test_x86_tbm_bextri_u64_z2:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax		; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: shrl $4, %edi		; CHECK-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
; CHECK-NEXT: testl $4095, %edi # imm = 0xFFF
; CHECK-NEXT: cmovneq %rdx, %rax		; CHECK-NEXT: cmovneq %rdx, %rax
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%t0 = lshr i64 %a, 4		%t0 = lshr i64 %a, 4
%t1 = and i64 %t0, 4095		%t1 = and i64 %t0, 4095
%t2 = icmp eq i64 %t1, 0		%t2 = icmp eq i64 %t1, 0
%t3 = select i1 %t2, i64 %b, i64 %c		%t3 = select i1 %t2, i64 %b, i64 %c
ret i64 %t3		ret i64 %t3
}		}
▲ Show 20 Lines • Show All 796 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Move X86DAGToDAGISel::matchBEXTRFromAnd() into X86ISelLowering
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 167788

lib/Target/X86/X86ISelDAGToDAG.cpp

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/bmi-x86_64.ll

test/CodeGen/X86/extract-bits.ll

test/CodeGen/X86/tbm_patterns.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Move X86DAGToDAGISel::matchBEXTRFromAnd() into X86ISelLoweringClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 167788

lib/Target/X86/X86ISelDAGToDAG.cpp

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/bmi-x86_64.ll

test/CodeGen/X86/extract-bits.ll

test/CodeGen/X86/tbm_patterns.ll

[X86] Move X86DAGToDAGISel::matchBEXTRFromAnd() into X86ISelLowering
ClosedPublic