Diff 269880

llvm/include/llvm/CodeGen/TargetLowering.h

Show First 20 Lines • Show All 742 Lines • ▼ Show 20 Lines	public:
/// add (add %x, 1), %y		/// add (add %x, 1), %y
/// The variant with two add's is IR-canonical.		/// The variant with two add's is IR-canonical.
/// Some targets may prefer one to the other.		/// Some targets may prefer one to the other.
virtual bool preferIncOfAddToSubOfNot(EVT VT) const {		virtual bool preferIncOfAddToSubOfNot(EVT VT) const {
// By default, let's assume that everyone prefers the form with two add's.		// By default, let's assume that everyone prefers the form with two add's.
return true;		return true;
}		}

		/// Given an insert-element vector (InsElt) and a scalar operation (Op),
		/// return true if it would be profitable to convert the scalar operation into
		/// a vector operation. This would normally be true if:
		/// 1. The vector operation does not cost much more than a scalar version.
		/// 2. The target can avoid a costly transfer from scalar to vector register
		/// by loading a scalar operand directly into a vector register.
		virtual bool shouldLoadScalarIntoVectorOp(SDValue InsElt, SDValue Op,
		SelectionDAG &DAG) const {
		return false;
		}

/// Return true if the target wants to use the optimization that		/// Return true if the target wants to use the optimization that
/// turns ext(promotableInst1(...(promotableInstN(load)))) into		/// turns ext(promotableInst1(...(promotableInstN(load)))) into
/// promotedInst1(...(promotedInstN(ext(load)))).		/// promotedInst1(...(promotedInstN(ext(load)))).
bool enableExtLdPromotion() const { return EnableExtLdPromotion; }		bool enableExtLdPromotion() const { return EnableExtLdPromotion; }

/// Return true if the target can combine store(extractelement VectorTy,		/// Return true if the target can combine store(extractelement VectorTy,
/// Idx).		/// Idx).
/// \p Cost[out] gives the cost of that transformation when this is true.		/// \p Cost[out] gives the cost of that transformation when this is true.
▲ Show 20 Lines • Show All 3,721 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 9,991 Lines • ▼ Show 20 Lines
	SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);			SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
	SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);			SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
	AddToWorklist(PaddedSubV.getNode());			AddToWorklist(PaddedSubV.getNode());
	AddToWorklist(DestVecBC.getNode());			AddToWorklist(DestVecBC.getNode());
	AddToWorklist(Shuf.getNode());			AddToWorklist(Shuf.getNode());
	return DAG.getBitcast(VT, Shuf);			return DAG.getBitcast(VT, Shuf);
	}			}

				/// Try to convert a load of a scalar + scalar binop + insert element into a
				/// load + insert element + vector binop. If we can load the scalar directly
				/// into a vector register, this eliminates a potentially expensive transfer
				/// from scalar register to vector register.
				static SDValue combineLoadBinopInsElt(SDNode *Ins, SelectionDAG &DAG) {
				// TODO: This can be loosened to allow insertion into any constant vector.
				SDValue UndefVec = Ins->getOperand(0);
				SDValue BO = Ins->getOperand(1);
				const TargetLowering &TLI = DAG.getTargetLoweringInfo();
				if (!UndefVec.isUndef() \|\| !TLI.isBinOp(BO.getOpcode()) \|\| !BO.hasOneUse())
				return SDValue();

				if (!TLI.shouldLoadScalarIntoVectorOp(SDValue(Ins, 0), BO, DAG))
				return SDValue();

				// We are matching a binop that has a loaded operand and a constant operand.
				// This is complicated because the operands can be in either order (and we
				// must capture that fact), and the constant can be either integer or FP.
				EVT VecVT = Ins->getOperand(0).getValueType();
				SDValue Ld, C;
				SDLoc DL(Ins);
				auto matchLoadAndConstant = [&](SDValue Op0, SDValue Op1) {
				if (Op0.getOpcode() != ISD::LOAD)
				return false;
				// Splat a scalar constant operand for use in a vector op. The caller can
				// adjust (blend) this constant with the original insertion vector constant.
				if (auto *CInt = dyn_cast<ConstantSDNode>(Op1)) {
				// Account for scalar operand size differences. For example, scalar shift
				// amount may have a different type than the other operand/result.
				unsigned Width = VecVT.getScalarSizeInBits();
				C = DAG.getConstant(CInt->getAPIntValue().zextOrTrunc(Width), DL, VecVT);
				} else if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op1)) {
				C = DAG.getConstantFP(CFP->getValueAPF(), DL, VecVT);
				} else {
				return false;
				}
				Ld = Op0;
				return true;
				};

				SDValue BO0 = BO.getOperand(0), BO1 = BO.getOperand(1);
				bool Op0IsLoad = matchLoadAndConstant(BO0, BO1);
				if (!Op0IsLoad && !matchLoadAndConstant(BO1, BO0))
				return SDValue();

				// ins undef, (bo (load X), C), index --> bo (ins undef, (load X), index), C'
				// ins undef, (bo C, (load X)), index --> bo C', (ins undef, (load X), index)
				SDValue NewInsert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT,
				UndefVec, Op0IsLoad ? BO0 : BO1,
				Ins->getOperand(2));

				// TODO: We created a splat constant above because we did not check the
				// insert index. If the insert index is a constant and/or we were not
				// originally inserting into an undef constant, we should compute the
				// other constant elements as needed.
				return Op0IsLoad ? DAG.getNode(BO.getOpcode(), DL, VecVT, NewInsert, C)
				: DAG.getNode(BO.getOpcode(), DL, VecVT, C, NewInsert);
				}

	SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {			SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
	SDValue InVec = N->getOperand(0);			SDValue InVec = N->getOperand(0);
	SDValue InVal = N->getOperand(1);			SDValue InVal = N->getOperand(1);
	SDValue EltNo = N->getOperand(2);			SDValue EltNo = N->getOperand(2);
	SDLoc DL(N);			SDLoc DL(N);

	EVT VT = InVec.getValueType();			EVT VT = InVec.getValueType();
	auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);			auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
	Show All 23 Lines
	return SDValue();			return SDValue();
	}			}

	if (VT.isScalableVector())			if (VT.isScalableVector())
	return SDValue();			return SDValue();

	unsigned NumElts = VT.getVectorNumElements();			unsigned NumElts = VT.getVectorNumElements();

				if (SDValue BO = combineLoadBinopInsElt(N, DAG))
				return BO;

	// We must know which element is being inserted for folds below here.			// We must know which element is being inserted for folds below here.
	unsigned Elt = IndexC->getZExtValue();			unsigned Elt = IndexC->getZExtValue();
	if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))			if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
	return Shuf;			return Shuf;

	// Canonicalize insert_vector_elt dag nodes.			// Canonicalize insert_vector_elt dag nodes.
	// Example:			// Example:
	// (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)			// (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
	▲ Show 20 Lines • Show All 4,620 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86ISelLowering.h

Show First 20 Lines • Show All 1,019 Lines • ▼ Show 20 Lines	shouldTransformSignedTruncationCheck(EVT XVT,
MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);		MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
return VTIsOk(XVT) && VTIsOk(KeptBitsVT);		return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
}		}

bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;		bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;

bool shouldSplatInsEltVarIndex(EVT VT) const override;		bool shouldSplatInsEltVarIndex(EVT VT) const override;

		bool shouldLoadScalarIntoVectorOp(SDValue Ins, SDValue Op,
		SelectionDAG &DAG) const override;

bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {		bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();		return VT.isScalarInteger();
}		}

/// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.		/// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
MVT hasFastEqualityCompare(unsigned NumBits) const override;		MVT hasFastEqualityCompare(unsigned NumBits) const override;

/// Return the value type to use for ISD::SETCC.		/// Return the value type to use for ISD::SETCC.
▲ Show 20 Lines • Show All 660 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 5,370 Lines • ▼ Show 20 Lines
	}			}

	bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {			bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
	// Any legal vector type can be splatted more efficiently than			// Any legal vector type can be splatted more efficiently than
	// loading/spilling from memory.			// loading/spilling from memory.
	return isTypeLegal(VT);			return isTypeLegal(VT);
	}			}

				/// If we can load an integer scalar into a vector register with minimal
				/// shuffling and the vector operation is supported, then avoiding a transfer
				/// from GPR to vector is probably a win.
				bool X86TargetLowering::shouldLoadScalarIntoVectorOp(SDValue InsElt,
				SDValue BinOp,
				SelectionDAG &DAG) const {
				// Without SSE2, we only have movss and no integer vector ops; don't bother.
				// FP uses the same registers, so the transform would never help for an FP op.
				EVT VecVT = InsElt.getValueType();
				if (!Subtarget.hasSSE2() \|\| VecVT.isFloatingPoint())
				return false;

				// Don't try this when optimizing for size because vector code and vector
				// constants are probably bigger than their scalar counterparts.
				if (DAG.getMachineFunction().getFunction().hasOptSize())
				return false;

				// Loading into the 0-index lane is possible with SSE2 using movd/movq.
				// TODO: AVX1 and AVX2 can splat (broadcast) various scalar types.
				EVT ScalarVT = BinOp.getValueType();
				SDValue InsIdx = InsElt.getOperand(2);
				if ((ScalarVT != MVT::i32 && ScalarVT != MVT::i64) \|\| !isNullConstant(InsIdx))
				return false;

				// Filter out illegal vector types, ISA holes, and unsupported vector ops.
				// TODO: This eliminates custom/promoted ops that are probably ok.
				auto Opcode = BinOp.getOpcode();
				if (!isTypeDesirableForOp(Opcode, VecVT) \|\| !isOperationLegal(Opcode, VecVT))
				return false;

				return true;
				}

	MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {			MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
	MVT VT = MVT::getIntegerVT(NumBits);			MVT VT = MVT::getIntegerVT(NumBits);
	if (isTypeLegal(VT))			if (isTypeLegal(VT))
	return VT;			return VT;

	// PMOVMSKB can handle this.			// PMOVMSKB can handle this.
	if (NumBits == 128 && isTypeLegal(MVT::v16i8))			if (NumBits == 128 && isTypeLegal(MVT::v16i8))
	return MVT::v16i8;			return MVT::v16i8;
	▲ Show 20 Lines • Show All 9,991 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/load-scalar-as-vector.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 \| FileCheck %s --check-prefixes=SSE,SSE2		; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 \| FileCheck %s --check-prefixes=SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 \| FileCheck %s --check-prefixes=SSE,SSE4		; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 \| FileCheck %s --check-prefixes=SSE,SSE4
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx \| FileCheck %s --check-prefixes=AVX,AVX1		; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx \| FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 \| FileCheck %s --check-prefixes=AVX,AVX2		; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 \| FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512dq,+avx512bw \| FileCheck %s --check-prefixes=AVX,AVX512		; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512dq,+avx512bw \| FileCheck %s --check-prefixes=AVX,AVX512

define <4 x i32> @add_op1_constant(i32* %p) nounwind {		define <4 x i32> @add_op1_constant(i32* %p) nounwind {
; SSE-LABEL: add_op1_constant:		; SSE-LABEL: add_op1_constant:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: movl (%rdi), %eax		; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: addl $42, %eax		; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: add_op1_constant:		; AVX1-LABEL: add_op1_constant:
; AVX: # %bb.0:		; AVX1: # %bb.0:
; AVX-NEXT: movl (%rdi), %eax		; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: addl $42, %eax		; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vmovd %eax, %xmm0		; AVX1-NEXT: retq
; AVX-NEXT: retq		;
		; AVX2-LABEL: add_op1_constant:
		; AVX2: # %bb.0:
		; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
		; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
		; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
		; AVX2-NEXT: retq
		;
		; AVX512-LABEL: add_op1_constant:
		; AVX512: # %bb.0:
		; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
		; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
		; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
		; AVX512-NEXT: retq
%x = load i32, i32* %p		%x = load i32, i32* %p
%b = add i32 %x, 42		%b = add i32 %x, 42
%r = insertelement <4 x i32> undef, i32 %b, i32 0		%r = insertelement <4 x i32> undef, i32 %b, i32 0
ret <4 x i32> %r		ret <4 x i32> %r
}		}

; Code and data size may increase by using more vector ops, so the transform is disabled here.		; Code and data size may increase by using more vector ops, so the transform is disabled here.

Show All 35 Lines	; AVX-NEXT: retq
%b = add i16 42, %x		%b = add i16 42, %x
%r = insertelement <8 x i16> undef, i16 %b, i32 0		%r = insertelement <8 x i16> undef, i16 %b, i32 0
ret <8 x i16> %r		ret <8 x i16> %r
}		}

define <2 x i64> @sub_op0_constant(i64* %p) nounwind {		define <2 x i64> @sub_op0_constant(i64* %p) nounwind {
; SSE-LABEL: sub_op0_constant:		; SSE-LABEL: sub_op0_constant:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: movl $42, %eax		; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: subq (%rdi), %rax		; SSE-NEXT: movdqa {{.*#+}} xmm0 = [42,42]
; SSE-NEXT: movq %rax, %xmm0		; SSE-NEXT: psubq %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: sub_op0_constant:		; AVX-LABEL: sub_op0_constant:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: movl $42, %eax		; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: subq (%rdi), %rax		; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42]
; AVX-NEXT: vmovq %rax, %xmm0		; AVX-NEXT: vpsubq %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%x = load i64, i64* %p		%x = load i64, i64* %p
%b = sub i64 42, %x		%b = sub i64 42, %x
%r = insertelement <2 x i64> undef, i64 %b, i32 0		%r = insertelement <2 x i64> undef, i64 %b, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <16 x i8> @sub_op1_constant(i8* %p) nounwind {		define <16 x i8> @sub_op1_constant(i8* %p) nounwind {
Show All 14 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%x = load i8, i8* %p		%x = load i8, i8* %p
%b = sub i8 %x, 42		%b = sub i8 %x, 42
%r = insertelement <16 x i8> undef, i8 %b, i32 0		%r = insertelement <16 x i8> undef, i8 %b, i32 0
ret <16 x i8> %r		ret <16 x i8> %r
}		}

define <4 x i32> @mul_op1_constant(i32* %p) nounwind {		define <4 x i32> @mul_op1_constant(i32* %p) nounwind {
; SSE-LABEL: mul_op1_constant:		; SSE2-LABEL: mul_op1_constant:
; SSE: # %bb.0:		; SSE2: # %bb.0:
; SSE-NEXT: imull $42, (%rdi), %eax		; SSE2-NEXT: imull $42, (%rdi), %eax
; SSE-NEXT: movd %eax, %xmm0		; SSE2-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq		; SSE2-NEXT: retq
;		;
; AVX-LABEL: mul_op1_constant:		; SSE4-LABEL: mul_op1_constant:
; AVX: # %bb.0:		; SSE4: # %bb.0:
; AVX-NEXT: imull $42, (%rdi), %eax		; SSE4-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vmovd %eax, %xmm0		; SSE4-NEXT: pmulld {{.*}}(%rip), %xmm0
; AVX-NEXT: retq		; SSE4-NEXT: retq
		;
		; AVX1-LABEL: mul_op1_constant:
		; AVX1: # %bb.0:
		; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
		; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
		; AVX1-NEXT: retq
		;
		; AVX2-LABEL: mul_op1_constant:
		; AVX2: # %bb.0:
		; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
		; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
		; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
		; AVX2-NEXT: retq
		;
		; AVX512-LABEL: mul_op1_constant:
		; AVX512: # %bb.0:
		; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
		; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
		; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0
		; AVX512-NEXT: retq
%x = load i32, i32* %p		%x = load i32, i32* %p
%b = mul i32 %x, 42		%b = mul i32 %x, 42
%r = insertelement <4 x i32> undef, i32 %b, i32 0		%r = insertelement <4 x i32> undef, i32 %b, i32 0
ret <4 x i32> %r		ret <4 x i32> %r
}		}

define <8 x i16> @mul_op0_constant(i16* %p) nounwind {		define <8 x i16> @mul_op0_constant(i16* %p) nounwind {
; SSE-LABEL: mul_op0_constant:		; SSE-LABEL: mul_op0_constant:
Show All 13 Lines	; AVX-NEXT: retq
%b = mul i16 42, %x		%b = mul i16 42, %x
%r = insertelement <8 x i16> undef, i16 %b, i32 0		%r = insertelement <8 x i16> undef, i16 %b, i32 0
ret <8 x i16> %r		ret <8 x i16> %r
}		}

define <4 x i32> @and_op1_constant(i32* %p) nounwind {		define <4 x i32> @and_op1_constant(i32* %p) nounwind {
; SSE-LABEL: and_op1_constant:		; SSE-LABEL: and_op1_constant:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: movl (%rdi), %eax		; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: andl $42, %eax		; SSE-NEXT: andps {{.*}}(%rip), %xmm0
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: and_op1_constant:		; AVX1-LABEL: and_op1_constant:
; AVX: # %bb.0:		; AVX1: # %bb.0:
; AVX-NEXT: movl (%rdi), %eax		; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: andl $42, %eax		; AVX1-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vmovd %eax, %xmm0		; AVX1-NEXT: retq
; AVX-NEXT: retq		;
		; AVX2-LABEL: and_op1_constant:
		; AVX2: # %bb.0:
		; AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
		; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [42,42,42,42]
		; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
		; AVX2-NEXT: retq
		;
		; AVX512-LABEL: and_op1_constant:
		; AVX512: # %bb.0:
		; AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
		; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [42,42,42,42]
		; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
		; AVX512-NEXT: retq
%x = load i32, i32* %p		%x = load i32, i32* %p
%b = and i32 %x, 42		%b = and i32 %x, 42
%r = insertelement <4 x i32> undef, i32 %b, i32 0		%r = insertelement <4 x i32> undef, i32 %b, i32 0
ret <4 x i32> %r		ret <4 x i32> %r
}		}

define <2 x i64> @or_op1_constant(i64* %p) nounwind {		define <2 x i64> @or_op1_constant(i64* %p) nounwind {
; SSE-LABEL: or_op1_constant:		; SSE-LABEL: or_op1_constant:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: movq (%rdi), %rax		; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: orq $42, %rax		; SSE-NEXT: orps {{.*}}(%rip), %xmm0
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: or_op1_constant:		; AVX-LABEL: or_op1_constant:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: movq (%rdi), %rax		; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: orq $42, %rax		; AVX-NEXT: vorps {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%x = load i64, i64* %p		%x = load i64, i64* %p
%b = or i64 %x, 42		%b = or i64 %x, 42
%r = insertelement <2 x i64> undef, i64 %b, i32 0		%r = insertelement <2 x i64> undef, i64 %b, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <8 x i16> @xor_op1_constant(i16* %p) nounwind {		define <8 x i16> @xor_op1_constant(i16* %p) nounwind {
▲ Show 20 Lines • Show All 521 Lines • ▼ Show 20 Lines
define <8 x i32> @add_op1_constant_v8i32(i32* %p) nounwind {		define <8 x i32> @add_op1_constant_v8i32(i32* %p) nounwind {
; SSE-LABEL: add_op1_constant_v8i32:		; SSE-LABEL: add_op1_constant_v8i32:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: movl (%rdi), %eax		; SSE-NEXT: movl (%rdi), %eax
; SSE-NEXT: addl $42, %eax		; SSE-NEXT: addl $42, %eax
; SSE-NEXT: movd %eax, %xmm0		; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: add_op1_constant_v8i32:		; AVX1-LABEL: add_op1_constant_v8i32:
; AVX: # %bb.0:		; AVX1: # %bb.0:
; AVX-NEXT: movl (%rdi), %eax		; AVX1-NEXT: movl (%rdi), %eax
; AVX-NEXT: addl $42, %eax		; AVX1-NEXT: addl $42, %eax
; AVX-NEXT: vmovd %eax, %xmm0		; AVX1-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq		; AVX1-NEXT: retq
		;
		; AVX2-LABEL: add_op1_constant_v8i32:
		; AVX2: # %bb.0:
		; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
		; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [42,42,42,42,42,42,42,42]
		; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
		; AVX2-NEXT: retq
		;
		; AVX512-LABEL: add_op1_constant_v8i32:
		; AVX512: # %bb.0:
		; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
		; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [42,42,42,42,42,42,42,42]
		; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
		; AVX512-NEXT: retq
%x = load i32, i32* %p		%x = load i32, i32* %p
%b = add i32 %x, 42		%b = add i32 %x, 42
%r = insertelement <8 x i32> undef, i32 %b, i32 0		%r = insertelement <8 x i32> undef, i32 %b, i32 0
ret <8 x i32> %r		ret <8 x i32> %r
}		}

define <4 x i64> @sub_op0_constant_v4i64(i64* %p) nounwind {		define <4 x i64> @sub_op0_constant_v4i64(i64* %p) nounwind {
; SSE-LABEL: sub_op0_constant_v4i64:		; SSE-LABEL: sub_op0_constant_v4i64:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: movl $42, %eax		; SSE-NEXT: movl $42, %eax
; SSE-NEXT: subq (%rdi), %rax		; SSE-NEXT: subq (%rdi), %rax
; SSE-NEXT: movq %rax, %xmm0		; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: sub_op0_constant_v4i64:		; AVX1-LABEL: sub_op0_constant_v4i64:
; AVX: # %bb.0:		; AVX1: # %bb.0:
; AVX-NEXT: movl $42, %eax		; AVX1-NEXT: movl $42, %eax
; AVX-NEXT: subq (%rdi), %rax		; AVX1-NEXT: subq (%rdi), %rax
; AVX-NEXT: vmovq %rax, %xmm0		; AVX1-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq		; AVX1-NEXT: retq
		;
		; AVX2-LABEL: sub_op0_constant_v4i64:
		; AVX2: # %bb.0:
		; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
		; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42]
		; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm0
		; AVX2-NEXT: retq
		;
		; AVX512-LABEL: sub_op0_constant_v4i64:
		; AVX512: # %bb.0:
		; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
		; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42]
		; AVX512-NEXT: vpsubq %ymm0, %ymm1, %ymm0
		; AVX512-NEXT: retq
%x = load i64, i64* %p		%x = load i64, i64* %p
%b = sub i64 42, %x		%b = sub i64 42, %x
%r = insertelement <4 x i64> undef, i64 %b, i32 0		%r = insertelement <4 x i64> undef, i64 %b, i32 0
ret <4 x i64> %r		ret <4 x i64> %r
}		}

define <8 x i32> @mul_op1_constant_v8i32(i32* %p) nounwind {		define <8 x i32> @mul_op1_constant_v8i32(i32* %p) nounwind {
; SSE-LABEL: mul_op1_constant_v8i32:		; SSE-LABEL: mul_op1_constant_v8i32:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: imull $42, (%rdi), %eax		; SSE-NEXT: imull $42, (%rdi), %eax
; SSE-NEXT: movd %eax, %xmm0		; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: mul_op1_constant_v8i32:		; AVX1-LABEL: mul_op1_constant_v8i32:
; AVX: # %bb.0:		; AVX1: # %bb.0:
; AVX-NEXT: imull $42, (%rdi), %eax		; AVX1-NEXT: imull $42, (%rdi), %eax
; AVX-NEXT: vmovd %eax, %xmm0		; AVX1-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq		; AVX1-NEXT: retq
		;
		; AVX2-LABEL: mul_op1_constant_v8i32:
		; AVX2: # %bb.0:
		; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
		; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [42,42,42,42,42,42,42,42]
		; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
		; AVX2-NEXT: retq
		;
		; AVX512-LABEL: mul_op1_constant_v8i32:
		; AVX512: # %bb.0:
		; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
		; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [42,42,42,42,42,42,42,42]
		; AVX512-NEXT: vpmulld %ymm1, %ymm0, %ymm0
		; AVX512-NEXT: retq
%x = load i32, i32* %p		%x = load i32, i32* %p
%b = mul i32 %x, 42		%b = mul i32 %x, 42
%r = insertelement <8 x i32> undef, i32 %b, i32 0		%r = insertelement <8 x i32> undef, i32 %b, i32 0
ret <8 x i32> %r		ret <8 x i32> %r
}		}

define <4 x i64> @or_op1_constant_v4i64(i64* %p) nounwind {		define <4 x i64> @or_op1_constant_v4i64(i64* %p) nounwind {
; SSE-LABEL: or_op1_constant_v4i64:		; SSE-LABEL: or_op1_constant_v4i64:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: movq (%rdi), %rax		; SSE-NEXT: movq (%rdi), %rax
; SSE-NEXT: orq $42, %rax		; SSE-NEXT: orq $42, %rax
; SSE-NEXT: movq %rax, %xmm0		; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: or_op1_constant_v4i64:		; AVX1-LABEL: or_op1_constant_v4i64:
; AVX: # %bb.0:		; AVX1: # %bb.0:
; AVX-NEXT: movq (%rdi), %rax		; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: orq $42, %rax		; AVX1-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
; AVX-NEXT: vmovq %rax, %xmm0		; AVX1-NEXT: retq
; AVX-NEXT: retq		;
		; AVX2-LABEL: or_op1_constant_v4i64:
		; AVX2: # %bb.0:
		; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
		; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [42,42,42,42]
		; AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0
		; AVX2-NEXT: retq
		;
		; AVX512-LABEL: or_op1_constant_v4i64:
		; AVX512: # %bb.0:
		; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
		; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm1 = [42,42,42,42]
		; AVX512-NEXT: vorps %ymm1, %ymm0, %ymm0
		; AVX512-NEXT: retq
%x = load i64, i64* %p		%x = load i64, i64* %p
%b = or i64 %x, 42		%b = or i64 %x, 42
%r = insertelement <4 x i64> undef, i64 %b, i32 0		%r = insertelement <4 x i64> undef, i64 %b, i32 0
ret <4 x i64> %r		ret <4 x i64> %r
}		}

; Try again with 512-bit types.		; Try again with 512-bit types.

define <16 x i32> @add_op1_constant_v16i32(i32* %p) nounwind {		define <16 x i32> @add_op1_constant_v16i32(i32* %p) nounwind {
; SSE-LABEL: add_op1_constant_v16i32:		; SSE-LABEL: add_op1_constant_v16i32:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: movl (%rdi), %eax		; SSE-NEXT: movl (%rdi), %eax
; SSE-NEXT: addl $42, %eax		; SSE-NEXT: addl $42, %eax
; SSE-NEXT: movd %eax, %xmm0		; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: add_op1_constant_v16i32:		; AVX1-LABEL: add_op1_constant_v16i32:
; AVX: # %bb.0:		; AVX1: # %bb.0:
; AVX-NEXT: movl (%rdi), %eax		; AVX1-NEXT: movl (%rdi), %eax
; AVX-NEXT: addl $42, %eax		; AVX1-NEXT: addl $42, %eax
; AVX-NEXT: vmovd %eax, %xmm0		; AVX1-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq		; AVX1-NEXT: retq
		;
		; AVX2-LABEL: add_op1_constant_v16i32:
		; AVX2: # %bb.0:
		; AVX2-NEXT: movl (%rdi), %eax
		; AVX2-NEXT: addl $42, %eax
		; AVX2-NEXT: vmovd %eax, %xmm0
		; AVX2-NEXT: retq
		;
		; AVX512-LABEL: add_op1_constant_v16i32:
		; AVX512: # %bb.0:
		; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
		; AVX512-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
		; AVX512-NEXT: retq
%x = load i32, i32* %p		%x = load i32, i32* %p
%b = add i32 %x, 42		%b = add i32 %x, 42
%r = insertelement <16 x i32> undef, i32 %b, i32 0		%r = insertelement <16 x i32> undef, i32 %b, i32 0
ret <16 x i32> %r		ret <16 x i32> %r
}		}

define <8 x i64> @sub_op0_constant_v8i64(i64* %p) nounwind {		define <8 x i64> @sub_op0_constant_v8i64(i64* %p) nounwind {
; SSE-LABEL: sub_op0_constant_v8i64:		; SSE-LABEL: sub_op0_constant_v8i64:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: movl $42, %eax		; SSE-NEXT: movl $42, %eax
; SSE-NEXT: subq (%rdi), %rax		; SSE-NEXT: subq (%rdi), %rax
; SSE-NEXT: movq %rax, %xmm0		; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: sub_op0_constant_v8i64:		; AVX1-LABEL: sub_op0_constant_v8i64:
; AVX: # %bb.0:		; AVX1: # %bb.0:
; AVX-NEXT: movl $42, %eax		; AVX1-NEXT: movl $42, %eax
; AVX-NEXT: subq (%rdi), %rax		; AVX1-NEXT: subq (%rdi), %rax
; AVX-NEXT: vmovq %rax, %xmm0		; AVX1-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq		; AVX1-NEXT: retq
		;
		; AVX2-LABEL: sub_op0_constant_v8i64:
		; AVX2: # %bb.0:
		; AVX2-NEXT: movl $42, %eax
		; AVX2-NEXT: subq (%rdi), %rax
		; AVX2-NEXT: vmovq %rax, %xmm0
		; AVX2-NEXT: retq
		;
		; AVX512-LABEL: sub_op0_constant_v8i64:
		; AVX512: # %bb.0:
		; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
		; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [42,42,42,42,42,42,42,42]
		; AVX512-NEXT: vpsubq %zmm0, %zmm1, %zmm0
		; AVX512-NEXT: retq
%x = load i64, i64* %p		%x = load i64, i64* %p
%b = sub i64 42, %x		%b = sub i64 42, %x
%r = insertelement <8 x i64> undef, i64 %b, i32 0		%r = insertelement <8 x i64> undef, i64 %b, i32 0
ret <8 x i64> %r		ret <8 x i64> %r
}		}

define <16 x i32> @mul_op1_constant_v16i32(i32* %p) nounwind {		define <16 x i32> @mul_op1_constant_v16i32(i32* %p) nounwind {
; SSE-LABEL: mul_op1_constant_v16i32:		; SSE-LABEL: mul_op1_constant_v16i32:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: imull $42, (%rdi), %eax		; SSE-NEXT: imull $42, (%rdi), %eax
; SSE-NEXT: movd %eax, %xmm0		; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: mul_op1_constant_v16i32:		; AVX1-LABEL: mul_op1_constant_v16i32:
; AVX: # %bb.0:		; AVX1: # %bb.0:
; AVX-NEXT: imull $42, (%rdi), %eax		; AVX1-NEXT: imull $42, (%rdi), %eax
; AVX-NEXT: vmovd %eax, %xmm0		; AVX1-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq		; AVX1-NEXT: retq
		;
		; AVX2-LABEL: mul_op1_constant_v16i32:
		; AVX2: # %bb.0:
		; AVX2-NEXT: imull $42, (%rdi), %eax
		; AVX2-NEXT: vmovd %eax, %xmm0
		; AVX2-NEXT: retq
		;
		; AVX512-LABEL: mul_op1_constant_v16i32:
		; AVX512: # %bb.0:
		; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
		; AVX512-NEXT: vpmulld {{.*}}(%rip){1to16}, %zmm0, %zmm0
		; AVX512-NEXT: retq
%x = load i32, i32* %p		%x = load i32, i32* %p
%b = mul i32 %x, 42		%b = mul i32 %x, 42
%r = insertelement <16 x i32> undef, i32 %b, i32 0		%r = insertelement <16 x i32> undef, i32 %b, i32 0
ret <16 x i32> %r		ret <16 x i32> %r
}		}

define <8 x i64> @or_op1_constant_v8i64(i64* %p) nounwind {		define <8 x i64> @or_op1_constant_v8i64(i64* %p) nounwind {
; SSE-LABEL: or_op1_constant_v8i64:		; SSE-LABEL: or_op1_constant_v8i64:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: movq (%rdi), %rax		; SSE-NEXT: movq (%rdi), %rax
; SSE-NEXT: orq $42, %rax		; SSE-NEXT: orq $42, %rax
; SSE-NEXT: movq %rax, %xmm0		; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: or_op1_constant_v8i64:		; AVX1-LABEL: or_op1_constant_v8i64:
; AVX: # %bb.0:		; AVX1: # %bb.0:
; AVX-NEXT: movq (%rdi), %rax		; AVX1-NEXT: movq (%rdi), %rax
; AVX-NEXT: orq $42, %rax		; AVX1-NEXT: orq $42, %rax
; AVX-NEXT: vmovq %rax, %xmm0		; AVX1-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq		; AVX1-NEXT: retq
		;
		; AVX2-LABEL: or_op1_constant_v8i64:
		; AVX2: # %bb.0:
		; AVX2-NEXT: movq (%rdi), %rax
		; AVX2-NEXT: orq $42, %rax
		; AVX2-NEXT: vmovq %rax, %xmm0
		; AVX2-NEXT: retq
		;
		; AVX512-LABEL: or_op1_constant_v8i64:
		; AVX512: # %bb.0:
		; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
		; AVX512-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
		; AVX512-NEXT: retq
%x = load i64, i64* %p		%x = load i64, i64* %p
%b = or i64 %x, 42		%b = or i64 %x, 42
%r = insertelement <8 x i64> undef, i64 %b, i32 0		%r = insertelement <8 x i64> undef, i64 %b, i32 0
ret <8 x i64> %r		ret <8 x i64> %r
}		}

This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombiner][x86] add transform/hook to load a scalar directly for use in a vector binop
Needs ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 269880

llvm/include/llvm/CodeGen/TargetLowering.h

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/Target/X86/X86ISelLowering.h

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/test/CodeGen/X86/load-scalar-as-vector.ll

This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombiner][x86] add transform/hook to load a scalar directly for use in a vector binopNeeds ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 269880

llvm/include/llvm/CodeGen/TargetLowering.h

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/Target/X86/X86ISelLowering.h

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/test/CodeGen/X86/load-scalar-as-vector.ll

[DAGCombiner][x86] add transform/hook to load a scalar directly for use in a vector binop
Needs ReviewPublic