Diff 143895

llvm/include/llvm/IR/IntrinsicsX86.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 405 Lines • ▼ Show 20 Lines	def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,		Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;		llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw128">,		def int_x86_sse2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,		Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;		llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">,		def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,		Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;		llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
}		}

// Integer shift ops.		// Integer shift ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".		let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_psll_w : GCCBuiltin<"__builtin_ia32_psllw128">,		def int_x86_sse2_psll_w : GCCBuiltin<"__builtin_ia32_psllw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,		Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;		llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_sse2_psll_d : GCCBuiltin<"__builtin_ia32_pslld128">,		def int_x86_sse2_psll_d : GCCBuiltin<"__builtin_ia32_pslld128">,
▲ Show 20 Lines • Show All 1,230 Lines • ▼ Show 20 Lines	def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,		Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;		llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw256">,		def int_x86_avx2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,		Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;		llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">,		def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty,		Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;		llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
}		}

// Integer shift ops.		// Integer shift ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".		let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_psll_w : GCCBuiltin<"__builtin_ia32_psllw256">,		def int_x86_avx2_psll_w : GCCBuiltin<"__builtin_ia32_psllw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,		Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;		llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_avx2_psll_d : GCCBuiltin<"__builtin_ia32_pslld256">,		def int_x86_avx2_psll_d : GCCBuiltin<"__builtin_ia32_pslld256">,
▲ Show 20 Lines • Show All 3,010 Lines • ▼ Show 20 Lines	let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_round_mask">,		def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,		Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],		llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;		[IntrNoMem]>;
def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_round_mask">,		def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,		Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],		llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;		[IntrNoMem]>;
def int_x86_avx512_psad_bw_512 : GCCBuiltin<"__builtin_ia32_psadbw512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
[IntrNoMem, Commutative]>;
}		}
// Integer arithmetic ops		// Integer arithmetic ops
let TargetPrefix = "x86" in {		let TargetPrefix = "x86" in {
def int_x86_avx512_mask_padds_b_128 : // FIXME: remove this intrinsic		def int_x86_avx512_mask_padds_b_128 : // FIXME: remove this intrinsic
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,		Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;		llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_padds_b_256 : // FIXME: remove this intrinsic		def int_x86_avx512_mask_padds_b_256 : // FIXME: remove this intrinsic
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,		Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
▲ Show 20 Lines • Show All 1,702 Lines • Show Last 20 Lines

llvm/lib/IR/AutoUpgrade.cpp

Show First 20 Lines • Show All 292 Lines • ▼ Show 20 Lines	if (Name=="ssse3.pabs.b.128" \|\| // Added in 6.0
Name.startswith("avx512.mask.move.s") \|\| // Added in 4.0		Name.startswith("avx512.mask.move.s") \|\| // Added in 4.0
Name.startswith("avx512.cvtmask2") \|\| // Added in 5.0		Name.startswith("avx512.cvtmask2") \|\| // Added in 5.0
(Name.startswith("xop.vpcom") && // Added in 3.2		(Name.startswith("xop.vpcom") && // Added in 3.2
F->arg_size() == 2) \|\|		F->arg_size() == 2) \|\|
Name.startswith("avx512.ptestm") \|\| //Added in 6.0		Name.startswith("avx512.ptestm") \|\| //Added in 6.0
Name.startswith("avx512.ptestnm") \|\| //Added in 6.0		Name.startswith("avx512.ptestnm") \|\| //Added in 6.0
Name.startswith("sse2.pavg") \|\| // Added in 6.0		Name.startswith("sse2.pavg") \|\| // Added in 6.0
Name.startswith("avx2.pavg") \|\| // Added in 6.0		Name.startswith("avx2.pavg") \|\| // Added in 6.0
Name.startswith("avx512.mask.pavg")) // Added in 6.0		Name.startswith("avx512.mask.pavg") \|\| // Added in 6.0
		Name == "sse2.psad.bw" \|\| // Added in 7.0
		Name == "avx2.psad.bw" \|\| // Added in 7.0
		Name == "avx512.psad.bw.512") // Added in 7.0
return true;		return true;

return false;		return false;
}		}

static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,		static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
Function *&NewFn) {		Function *&NewFn) {
// Only handle intrinsics that start with "x86.".		// Only handle intrinsics that start with "x86.".
▲ Show 20 Lines • Show All 867 Lines • ▼ Show 20 Lines	void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
if (AsmStr->find("mov\tfp") == 0 &&		if (AsmStr->find("mov\tfp") == 0 &&
AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&		AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
(Pos = AsmStr->find("# marker")) != std::string::npos) {		(Pos = AsmStr->find("# marker")) != std::string::npos) {
AsmStr->replace(Pos, 1, ";");		AsmStr->replace(Pos, 1, ";");
}		}
return;		return;
}		}

		// Upgrades calls to PSAD intrinsics.
		static Value *UpgradeX86SAD(IRBuilder<> &Builder, CallInst &CI) {
		// The operands arrive already bitcast to byte vectors.
		Value *A = CI.getArgOperand(0);
		Value *B = CI.getArgOperand(1);
		// N shows the corresponding number of qwords.
		unsigned N = (cast<VectorType>(CI.getType()))->getBitWidth() / 64;

		Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, A, B);
		Value *AD = Builder.CreateSelect(Cmp, Builder.CreateSub(A, B),
		Builder.CreateSub(B, A));

		Type *QTy = Builder.getInt64Ty();
		Type *VTy = VectorType::get(QTy, N);
		SmallVector<uint32_t, 8> ShuffleMask(N);
		for (unsigned i = 0; i < N; ++i)
		ShuffleMask[i] = i * 8;
		Value *Res = Builder.CreateZExt(
		Builder.CreateShuffleVector(AD, AD, ShuffleMask), VTy);
		for (unsigned i = 1; i < 8; ++i) {
		for (unsigned j = 0; j < N; ++j)
		ShuffleMask[j] = i + j * 8;
		Value *Sum = Builder.CreateShuffleVector(AD, AD, ShuffleMask);
		Res = Builder.CreateAdd(Res, Builder.CreateZExt(Sum, VTy));
		}
		return Res;

		}

/// Upgrade a call to an old intrinsic. All argument and return casting must be		/// Upgrade a call to an old intrinsic. All argument and return casting must be
/// provided to seamlessly integrate with existing context.		/// provided to seamlessly integrate with existing context.
void llvm::UpgradeIntrinsicCall(CallInst CI, Function NewFn) {		void llvm::UpgradeIntrinsicCall(CallInst CI, Function NewFn) {
Function *F = CI->getCalledFunction();		Function *F = CI->getCalledFunction();
LLVMContext &C = CI->getContext();		LLVMContext &C = CI->getContext();
IRBuilder<> Builder(C);		IRBuilder<> Builder(C);
Builder.SetInsertPoint(CI->getParent(), CI->getIterator());		Builder.SetInsertPoint(CI->getParent(), CI->getIterator());

▲ Show 20 Lines • Show All 1,144 Lines • ▼ Show 20 Lines
Rep = Builder.CreateTrunc(ShiftR, A->getType());		Rep = Builder.CreateTrunc(ShiftR, A->getType());
if (CI->getNumArgOperands() > 2) {		if (CI->getNumArgOperands() > 2) {
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,		Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));		CI->getArgOperand(2));
}		}
} else if (IsX86 && Name.startswith("avx512.mask.") &&		} else if (IsX86 && Name.startswith("avx512.mask.") &&
upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {		upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
// Rep will be updated by the call in the condition.		// Rep will be updated by the call in the condition.
		} else if (IsX86 &&
		(Name.startswith("sse2.psad") \|\| Name.startswith("avx2.psad") \|\|
		Name.startswith("avx512.psad"))) {
		// llvm.x86.sse2.psad.bw, llvm.x86.avx2.psad.bw,
		// llvm.x86.avx512.psad.bw.512
		Rep = UpgradeX86SAD(Builder, *CI);
} else if (IsNVVM && (Name == "abs.i" \|\| Name == "abs.ll")) {		} else if (IsNVVM && (Name == "abs.i" \|\| Name == "abs.ll")) {
Value *Arg = CI->getArgOperand(0);		Value *Arg = CI->getArgOperand(0);
Value *Neg = Builder.CreateNeg(Arg, "neg");		Value *Neg = Builder.CreateNeg(Arg, "neg");
Value *Cmp = Builder.CreateICmpSGE(		Value *Cmp = Builder.CreateICmpSGE(
Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");		Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");		Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
} else if (IsNVVM && (Name == "max.i" \|\| Name == "max.ll" \|\|		} else if (IsNVVM && (Name == "max.i" \|\| Name == "max.ll" \|\|
Name == "max.ui" \|\| Name == "max.ull")) {		Name == "max.ui" \|\| Name == "max.ull")) {
▲ Show 20 Lines • Show All 555 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 38,045 Lines • ▼ Show 20 Lines	return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT,
DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Ops[0]),		DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Ops[0]),
DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Ops[1]));		DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Ops[1]));
};		};
return SplitOpsAndApply(DAG, Subtarget, DL, VT,		return SplitOpsAndApply(DAG, Subtarget, DL, VT,
{ Mul.getOperand(0), Mul.getOperand(1) },		{ Mul.getOperand(0), Mul.getOperand(1) },
PMADDBuilder);		PMADDBuilder);
}		}

		// Try to find a sum-of-shuffles pattern for PSADBW.
		static SDValue combineScalarSADPattern(SDNode *Node, SelectionDAG &DAG,
		const X86Subtarget &Subtarget) {
		if (Node->getOpcode() != ISD::ADD)
		return SDValue();
		EVT Ty = Node->getValueType(0);
		if (!Ty.isVector() \|\| Ty.getScalarSizeInBits() != 64)
		return SDValue();
		unsigned N = Ty.getVectorNumElements();
		if ((N != 2 && N != 4 && N != 8) \|\| (N == 2 && !Subtarget.hasSSE2()) \|\|
		(N == 4 && !Subtarget.hasAVX2()) \|\|
		(N == 8 && (!Subtarget.hasAVX512() \|\| !Subtarget.hasBWI())))
		return SDValue();
		SDValue ByteGroup[8], Cap, Top;
		bool TopSpotted = false;
		ByteGroup[0] = SDValue(Node, 0);
		bool ByteGroupDone[8] = {false, false, false, false,
		false, false, false, false};
		unsigned NumByteGroups = 1;
		// For the vector SAD, we expect the following pattern: nodes are extracted
		// by vectors of 0-7 offsets, creating 8 vectors of byte differences that are
		// then summed.
		RKSimonUnsubmitted Not Done Reply Inline Actions Can we reuse/tweak matchBinOpReduction to do this for us? RKSimon: Can we reuse/tweak matchBinOpReduction to do this for us?
		mike.dvoretskyAuthorUnsubmitted Not Done Reply Inline Actions This is not a scalar reduction. The patterns calls for a sum of specifically formed vectors (hence all the checks below) to form the PSADBW instruction where it is exactly semantically fitting rather than where it can be used as a reduction tool. This is also why the third path to recognize it is being added - other paths use it for reductions and so don't actually need the input pattern to match it in terms of which qword the specific byte corresponds to. mike.dvoretsky: This is not a scalar reduction. The patterns calls for a sum of specifically formed vectors…
		while (NumByteGroups) {
		// If it's a sum, go up the tree.
		if (ByteGroup[NumByteGroups - 1].getOpcode() == ISD::ADD) {
		if (NumByteGroups == 8)
		return SDValue();
		ByteGroup[NumByteGroups] = ByteGroup[NumByteGroups - 1].getOperand(1);
		ByteGroup[NumByteGroups - 1] = ByteGroup[NumByteGroups - 1].getOperand(0);
		++NumByteGroups;
		// Check that the pattern above zext corresponds to a shuffle of the AD
		// pattern's output with the <pos, pos + 8, pos + 16, ...> shuffle mask.
		} else if (ByteGroup[NumByteGroups - 1].getOpcode() == ISD::ZERO_EXTEND &&
		ByteGroup[NumByteGroups - 1]
		.getValueType()
		.getVectorNumElements() == N) {
		--NumByteGroups;
		Cap = ByteGroup[NumByteGroups].getOperand(0);
		if (Cap.getOpcode() != ISD::BUILD_VECTOR)
		return SDValue();
		if (Cap.getNumOperands() < N)
		return SDValue();
		SDValue Extract = Cap.getOperand(0);
		if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
		return SDValue();
		if (!TopSpotted) {
		Top = Extract.getOperand(0);
		TopSpotted = true;
		} else if (Extract.getOperand(0) != Top)
		return SDValue();
		auto C = dyn_cast<ConstantSDNode>(Extract.getOperand(1).getNode());
		if (!C)
		return SDValue();
		unsigned Pos = C->getZExtValue();
		if (Pos >= 8 \|\| ByteGroupDone[Pos])
		return SDValue();
		for (unsigned i = 1; i < N; ++i) {
		Extract = Cap.getOperand(i);
		if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
		Extract.getOperand(0) != Top)
		return SDValue();
		auto C = dyn_cast<ConstantSDNode>(Extract.getOperand(1).getNode());
		if (!C \|\| C->getZExtValue() != Pos + 8 * i)
		return SDValue();
		}
		ByteGroupDone[Pos] = true;
		} else
		return SDValue();
		}
		// Check that we've found 8 shuffles.
		for (unsigned i = 0; i < 8; ++i)
		if (!ByteGroupDone[i])
		return SDValue();
		// Check that Top points to an absolute difference pattern, that is
		// AD[i] = (A[i] > B[i]) ? A[i] - B[i] : B[i] - A[i]
		if (Top.getOpcode() != ISD::VSELECT)
		return SDValue();
		SDValue SubP = Top.getOperand(1);
		SDValue SubN = Top.getOperand(2);
		if (SubP.getOpcode() != ISD::SUB \|\| SubN.getOpcode() != ISD::SUB)
		return SDValue();
		Top = Top.getOperand(0);
		if (Top.getOpcode() != ISD::SETCC)
		return SDValue();
		ISD::CondCode CC = cast<CondCodeSDNode>(Top.getOperand(2))->get();
		if (CC != ISD::SETUGT && CC != ISD::SETUGE)
		return SDValue();
		SDValue A = Top.getOperand(0);
		SDValue B = Top.getOperand(1);
		if (SubP.getOperand(0) != A \|\| SubP.getOperand(1) != B \|\|
		SubN.getOperand(0) != B \|\| SubN.getOperand(1) != A)
		return SDValue();

		SDLoc DL(Node);
		MVT VT = MVT::getVectorVT(MVT::i64, N);
		return DAG.getNode(X86ISD::PSADBW, DL, VT, A, B);
		}

static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,		static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {		const X86Subtarget &Subtarget) {
const SDNodeFlags Flags = N->getFlags();		const SDNodeFlags Flags = N->getFlags();
		if (SDValue VSad = combineScalarSADPattern(N, DAG, Subtarget))
		return VSad;
if (Flags.hasVectorReduction()) {		if (Flags.hasVectorReduction()) {
if (SDValue Sad = combineLoopSADPattern(N, DAG, Subtarget))		if (SDValue Sad = combineLoopSADPattern(N, DAG, Subtarget))
return Sad;		return Sad;
if (SDValue MAdd = combineLoopMAddPattern(N, DAG, Subtarget))		if (SDValue MAdd = combineLoopMAddPattern(N, DAG, Subtarget))
return MAdd;		return MAdd;
}		}
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);		SDValue Op0 = N->getOperand(0);
▲ Show 20 Lines • Show All 1,693 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86IntrinsicsInfo.h

Show First 20 Lines • Show All 410 Lines • ▼ Show 20 Lines	static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),		X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),		X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),
X86_INTRINSIC_DATA(avx2_pmadd_ub_sw, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),		X86_INTRINSIC_DATA(avx2_pmadd_ub_sw, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
X86_INTRINSIC_DATA(avx2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),		X86_INTRINSIC_DATA(avx2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(avx2_pmovmskb, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),		X86_INTRINSIC_DATA(avx2_pmovmskb, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(avx2_pmul_hr_sw, INTR_TYPE_2OP, X86ISD::MULHRS, 0),		X86_INTRINSIC_DATA(avx2_pmul_hr_sw, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),		X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),		X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
X86_INTRINSIC_DATA(avx2_psad_bw, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),		X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),		X86_INTRINSIC_DATA(avx2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),		X86_INTRINSIC_DATA(avx2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),		X86_INTRINSIC_DATA(avx2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),		X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),		X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),		X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0),		X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0),
▲ Show 20 Lines • Show All 1,016 Lines • ▼ Show 20 Lines	X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),		X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_pmaddubs_w_512, INTR_TYPE_2OP,		X86_INTRINSIC_DATA(avx512_pmaddubs_w_512, INTR_TYPE_2OP,
X86ISD::VPMADDUBSW, 0),		X86ISD::VPMADDUBSW, 0),
X86_INTRINSIC_DATA(avx512_pmaddw_d_512, INTR_TYPE_2OP,		X86_INTRINSIC_DATA(avx512_pmaddw_d_512, INTR_TYPE_2OP,
X86ISD::VPMADDWD, 0),		X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(avx512_pmul_hr_sw_512, INTR_TYPE_2OP, X86ISD::MULHRS, 0),		X86_INTRINSIC_DATA(avx512_pmul_hr_sw_512, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
X86_INTRINSIC_DATA(avx512_pmulh_w_512, INTR_TYPE_2OP, ISD::MULHS, 0),		X86_INTRINSIC_DATA(avx512_pmulh_w_512, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(avx512_pmulhu_w_512, INTR_TYPE_2OP, ISD::MULHU, 0),		X86_INTRINSIC_DATA(avx512_pmulhu_w_512, INTR_TYPE_2OP, ISD::MULHU, 0),
X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
X86_INTRINSIC_DATA(avx512_pshuf_b_512, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),		X86_INTRINSIC_DATA(avx512_pshuf_b_512, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_psll_d_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),		X86_INTRINSIC_DATA(avx512_psll_d_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_psll_q_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),		X86_INTRINSIC_DATA(avx512_psll_q_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_psll_w_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),		X86_INTRINSIC_DATA(avx512_psll_w_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_pslli_d_512, VSHIFT, X86ISD::VSHLI, 0),		X86_INTRINSIC_DATA(avx512_pslli_d_512, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_pslli_q_512, VSHIFT, X86ISD::VSHLI, 0),		X86_INTRINSIC_DATA(avx512_pslli_q_512, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0),		X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, ISD::SHL, 0),		X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, ISD::SHL, 0),
▲ Show 20 Lines • Show All 144 Lines • ▼ Show 20 Lines	X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),		X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),		X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(sse2_paddus_b, INTR_TYPE_2OP, X86ISD::ADDUS, 0),		X86_INTRINSIC_DATA(sse2_paddus_b, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(sse2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0),		X86_INTRINSIC_DATA(sse2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),		X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),		X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),		X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),		X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
X86_INTRINSIC_DATA(sse2_psad_bw, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
X86_INTRINSIC_DATA(sse2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),		X86_INTRINSIC_DATA(sse2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(sse2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),		X86_INTRINSIC_DATA(sse2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(sse2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),		X86_INTRINSIC_DATA(sse2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(sse2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),		X86_INTRINSIC_DATA(sse2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(sse2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),		X86_INTRINSIC_DATA(sse2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(sse2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),		X86_INTRINSIC_DATA(sse2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(sse2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),		X86_INTRINSIC_DATA(sse2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(sse2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),		X86_INTRINSIC_DATA(sse2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
▲ Show 20 Lines • Show All 127 Lines • Show Last 20 Lines

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Show First 20 Lines • Show All 2,689 Lines • ▼ Show 20 Lines	case Intrinsic::x86_mmx_packuswb:
handleVectorPackIntrinsic(I, 16);		handleVectorPackIntrinsic(I, 16);
break;		break;

case Intrinsic::x86_mmx_packssdw:		case Intrinsic::x86_mmx_packssdw:
handleVectorPackIntrinsic(I, 32);		handleVectorPackIntrinsic(I, 32);
break;		break;

case Intrinsic::x86_mmx_psad_bw:		case Intrinsic::x86_mmx_psad_bw:
case Intrinsic::x86_sse2_psad_bw:
case Intrinsic::x86_avx2_psad_bw:
handleVectorSadIntrinsic(I);		handleVectorSadIntrinsic(I);
break;		break;

case Intrinsic::x86_sse2_pmadd_wd:		case Intrinsic::x86_sse2_pmadd_wd:
case Intrinsic::x86_avx2_pmadd_wd:		case Intrinsic::x86_avx2_pmadd_wd:
case Intrinsic::x86_ssse3_pmadd_ub_sw_128:		case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
case Intrinsic::x86_avx2_pmadd_ub_sw:		case Intrinsic::x86_avx2_pmadd_ub_sw:
handleVectorPmaddIntrinsic(I);		handleVectorPmaddIntrinsic(I);
▲ Show 20 Lines • Show All 1,216 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll

	Show First 20 Lines • Show All 2,028 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ret{{[l\|q]}}			; CHECK-NEXT: ret{{[l\|q]}}
	%arg1 = bitcast <4 x i64> %a1 to <8 x i32>			%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
	%res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %arg1)			%res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %arg1)
	ret <8 x float> %res			ret <8 x float> %res
	}			}
	declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly			declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly

	define <4 x i64> @test_mm256_sad_epu8(<4 x i64> %a0, <4 x i64> %a1) {			define <4 x i64> @test_mm256_sad_epu8(<4 x i64> %a0, <4 x i64> %a1) {
	; CHECK-LABEL: test_mm256_sad_epu8:			; X86-LABEL: test_mm256_sad_epu8:
	; CHECK: # %bb.0:			; X86: # %bb.0:
	; CHECK-NEXT: vpsadbw %ymm1, %ymm0, %ymm0			; X86-NEXT: vpminub %ymm1, %ymm0, %ymm2
	; CHECK-NEXT: ret{{[l\|q]}}			; X86-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
	%arg0 = bitcast <4 x i64> %a0 to <32 x i8>			; X86-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
	%arg1 = bitcast <4 x i64> %a1 to <32 x i8>			; X86-NEXT: vpxor %ymm3, %ymm2, %ymm2
	%res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %arg0, <32 x i8> %arg1)			; X86-NEXT: vpsubb %ymm1, %ymm0, %ymm3
				; X86-NEXT: vpsubb %ymm0, %ymm1, %ymm0
				; X86-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
				; X86-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm1
				; X86-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[1],zero,zero,zero,zero,zero,zero,zero,ymm0[9],zero,zero,zero,zero,zero,zero,zero,ymm0[17],zero,zero,zero,zero,zero,zero,zero,ymm0[25],zero,zero,zero,zero,zero,zero,zero
				; X86-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[2],zero,zero,zero,zero,zero,zero,zero,ymm0[10],zero,zero,zero,zero,zero,zero,zero,ymm0[18],zero,zero,zero,zero,zero,zero,zero,ymm0[26],zero,zero,zero,zero,zero,zero,zero
				; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm2
				; X86-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[3],zero,zero,zero,zero,zero,zero,zero,ymm0[11],zero,zero,zero,zero,zero,zero,zero,ymm0[19],zero,zero,zero,zero,zero,zero,zero,ymm0[27],zero,zero,zero,zero,zero,zero,zero
				; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm2
				; X86-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[4],zero,zero,zero,zero,zero,zero,zero,ymm0[12],zero,zero,zero,zero,zero,zero,zero,ymm0[20],zero,zero,zero,zero,zero,zero,zero,ymm0[28],zero,zero,zero,zero,zero,zero,zero
				; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm2
				; X86-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[5],zero,zero,zero,zero,zero,zero,zero,ymm0[13],zero,zero,zero,zero,zero,zero,zero,ymm0[21],zero,zero,zero,zero,zero,zero,zero,ymm0[29],zero,zero,zero,zero,zero,zero,zero
				; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm2
				; X86-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[6],zero,zero,zero,zero,zero,zero,zero,ymm0[14],zero,zero,zero,zero,zero,zero,zero,ymm0[22],zero,zero,zero,zero,zero,zero,zero,ymm0[30],zero,zero,zero,zero,zero,zero,zero
				; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm2
				; X86-NEXT: vpsrlq $56, %ymm0, %ymm0
				; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0
				; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0
				; X86-NEXT: ret{{[l\|q]}}
				;
				; X64-LABEL: test_mm256_sad_epu8:
				; X64: # %bb.0:
				; X64-NEXT: vpminub %ymm1, %ymm0, %ymm2
				; X64-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
				; X64-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
				; X64-NEXT: vpxor %ymm3, %ymm2, %ymm2
				; X64-NEXT: vpsubb %ymm1, %ymm0, %ymm3
				; X64-NEXT: vpsubb %ymm0, %ymm1, %ymm0
				; X64-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
				; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm1
				; X64-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[1],zero,zero,zero,zero,zero,zero,zero,ymm0[9],zero,zero,zero,zero,zero,zero,zero,ymm0[17],zero,zero,zero,zero,zero,zero,zero,ymm0[25],zero,zero,zero,zero,zero,zero,zero
				; X64-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[2],zero,zero,zero,zero,zero,zero,zero,ymm0[10],zero,zero,zero,zero,zero,zero,zero,ymm0[18],zero,zero,zero,zero,zero,zero,zero,ymm0[26],zero,zero,zero,zero,zero,zero,zero
				; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm2
				; X64-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[3],zero,zero,zero,zero,zero,zero,zero,ymm0[11],zero,zero,zero,zero,zero,zero,zero,ymm0[19],zero,zero,zero,zero,zero,zero,zero,ymm0[27],zero,zero,zero,zero,zero,zero,zero
				; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm2
				; X64-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[4],zero,zero,zero,zero,zero,zero,zero,ymm0[12],zero,zero,zero,zero,zero,zero,zero,ymm0[20],zero,zero,zero,zero,zero,zero,zero,ymm0[28],zero,zero,zero,zero,zero,zero,zero
				; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm2
				; X64-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[5],zero,zero,zero,zero,zero,zero,zero,ymm0[13],zero,zero,zero,zero,zero,zero,zero,ymm0[21],zero,zero,zero,zero,zero,zero,zero,ymm0[29],zero,zero,zero,zero,zero,zero,zero
				; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm2
				; X64-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[6],zero,zero,zero,zero,zero,zero,zero,ymm0[14],zero,zero,zero,zero,zero,zero,zero,ymm0[22],zero,zero,zero,zero,zero,zero,zero,ymm0[30],zero,zero,zero,zero,zero,zero,zero
				; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm2
				; X64-NEXT: vpsrlq $56, %ymm0, %ymm0
				; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0
				; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0
				; X64-NEXT: ret{{[l\|q]}}
				%arg0 = bitcast <4 x i64> %a0 to <32 x i8>
				%arg1 = bitcast <4 x i64> %a1 to <32 x i8>
				%1 = icmp ugt <32 x i8> %arg0, %arg1
				%2 = sub <32 x i8> %arg0, %arg1
				%3 = sub <32 x i8> %arg1, %arg0
				%4 = select <32 x i1> %1, <32 x i8> %2, <32 x i8> %3
				%5 = shufflevector <32 x i8> %4, <32 x i8> %4, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
				%6 = zext <4 x i8> %5 to <4 x i64>
				%7 = shufflevector <32 x i8> %4, <32 x i8> %4, <4 x i32> <i32 1, i32 9, i32 17, i32 25>
				%8 = zext <4 x i8> %7 to <4 x i64>
				%9 = add <4 x i64> %6, %8
				%10 = shufflevector <32 x i8> %4, <32 x i8> %4, <4 x i32> <i32 2, i32 10, i32 18, i32 26>
				%11 = zext <4 x i8> %10 to <4 x i64>
				%12 = add <4 x i64> %9, %11
				%13 = shufflevector <32 x i8> %4, <32 x i8> %4, <4 x i32> <i32 3, i32 11, i32 19, i32 27>
				%14 = zext <4 x i8> %13 to <4 x i64>
				%15 = add <4 x i64> %12, %14
				%16 = shufflevector <32 x i8> %4, <32 x i8> %4, <4 x i32> <i32 4, i32 12, i32 20, i32 28>
				%17 = zext <4 x i8> %16 to <4 x i64>
				%18 = add <4 x i64> %15, %17
				%19 = shufflevector <32 x i8> %4, <32 x i8> %4, <4 x i32> <i32 5, i32 13, i32 21, i32 29>
				%20 = zext <4 x i8> %19 to <4 x i64>
				%21 = add <4 x i64> %18, %20
				%22 = shufflevector <32 x i8> %4, <32 x i8> %4, <4 x i32> <i32 6, i32 14, i32 22, i32 30>
				%23 = zext <4 x i8> %22 to <4 x i64>
				%24 = add <4 x i64> %21, %23
				%25 = shufflevector <32 x i8> %4, <32 x i8> %4, <4 x i32> <i32 7, i32 15, i32 23, i32 31>
				%26 = zext <4 x i8> %25 to <4 x i64>
				%res = add <4 x i64> %24, %26
	ret <4 x i64> %res			ret <4 x i64> %res
	}			}
	declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone

	define <4 x i64> @test_mm256_shuffle_epi32(<4 x i64> %a0) {			define <4 x i64> @test_mm256_shuffle_epi32(<4 x i64> %a0) {
	; CHECK-LABEL: test_mm256_shuffle_epi32:			; CHECK-LABEL: test_mm256_shuffle_epi32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,0,0,7,7,4,4]			; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,0,0,7,7,4,4]
	; CHECK-NEXT: ret{{[l\|q]}}			; CHECK-NEXT: ret{{[l\|q]}}
	%arg0 = bitcast <4 x i64> %a0 to <8 x i32>			%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
	%shuf = shufflevector <8 x i32> %arg0, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 0, i32 0, i32 7, i32 7, i32 4, i32 4>			%shuf = shufflevector <8 x i32> %arg0, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 0, i32 0, i32 7, i32 7, i32 4, i32 4>
	▲ Show 20 Lines • Show All 624 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll

	Show First 20 Lines • Show All 46 Lines • ▼ Show 20 Lines
	; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]			; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
	; X64-NEXT: retq			; X64-NEXT: retq
	%res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]			%res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
	ret <8 x i32> %res			ret <8 x i32> %res
	}			}
	declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone			declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone


				define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) {
				; X86-LABEL: test_x86_avx2_psad_bw:
				; X86: ## %bb.0:
				; X86-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
				; X86-NEXT: retl
				;
				; X64-LABEL: test_x86_avx2_psad_bw:
				; X64: ## %bb.0:
				; X64-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
				; X64-NEXT: retq
				%res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1]
				ret <4 x i64> %res
				}
				declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone


	define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) {			define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) {
	; X86-LABEL: test_x86_avx2_movntdqa:			; X86-LABEL: test_x86_avx2_movntdqa:
	; X86: ## %bb.0:			; X86: ## %bb.0:
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: vmovntdqa (%eax), %ymm0			; X86-NEXT: vmovntdqa (%eax), %ymm0
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: test_x86_avx2_movntdqa:			; X64-LABEL: test_x86_avx2_movntdqa:
	▲ Show 20 Lines • Show All 818 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll

Show First 20 Lines • Show All 479 Lines • ▼ Show 20 Lines
; X64-AVX512VL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1]		; X64-AVX512VL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1]
; X64-AVX512VL-NEXT: retq ## encoding: [0xc3]		; X64-AVX512VL-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]		%res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
ret <16 x i16> %res		ret <16 x i16> %res
}		}
declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone		declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone


define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) {
; X86-AVX-LABEL: test_x86_avx2_psad_bw:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf6,0xc1]
; X86-AVX-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX512VL-LABEL: test_x86_avx2_psad_bw:
; X86-AVX512VL: ## %bb.0:
; X86-AVX512VL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf6,0xc1]
; X86-AVX512VL-NEXT: retl ## encoding: [0xc3]
;
; X64-AVX-LABEL: test_x86_avx2_psad_bw:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf6,0xc1]
; X64-AVX-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_avx2_psad_bw:
; X64-AVX512VL: ## %bb.0:
; X64-AVX512VL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf6,0xc1]
; X64-AVX512VL-NEXT: retq ## encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone


define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {		define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
; X86-AVX-LABEL: test_x86_avx2_psll_d:		; X86-AVX-LABEL: test_x86_avx2_psll_d:
; X86-AVX: ## %bb.0:		; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpslld %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf2,0xc1]		; X86-AVX-NEXT: vpslld %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf2,0xc1]
; X86-AVX-NEXT: retl ## encoding: [0xc3]		; X86-AVX-NEXT: retl ## encoding: [0xc3]
;		;
; X86-AVX512VL-LABEL: test_x86_avx2_psll_d:		; X86-AVX512VL-LABEL: test_x86_avx2_psll_d:
; X86-AVX512VL: ## %bb.0:		; X86-AVX512VL: ## %bb.0:
▲ Show 20 Lines • Show All 803 Lines • ▼ Show 20 Lines
declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone		declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone


define <16 x i16> @test_x86_avx2_packusdw_fold() {		define <16 x i16> @test_x86_avx2_packusdw_fold() {
; X86-AVX-LABEL: test_x86_avx2_packusdw_fold:		; X86-AVX-LABEL: test_x86_avx2_packusdw_fold:
; X86-AVX: ## %bb.0:		; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]		; X86-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
; X86-AVX-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]		; X86-AVX-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
; X86-AVX-NEXT: ## fixup A - offset: 4, value: LCPI54_0, kind: FK_Data_4		; X86-AVX-NEXT: ## fixup A - offset: 4, value: LCPI53_0, kind: FK_Data_4
; X86-AVX-NEXT: retl ## encoding: [0xc3]		; X86-AVX-NEXT: retl ## encoding: [0xc3]
;		;
; X86-AVX512VL-LABEL: test_x86_avx2_packusdw_fold:		; X86-AVX512VL-LABEL: test_x86_avx2_packusdw_fold:
; X86-AVX512VL: ## %bb.0:		; X86-AVX512VL: ## %bb.0:
; X86-AVX512VL-NEXT: vmovaps LCPI54_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]		; X86-AVX512VL-NEXT: vmovaps LCPI53_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]		; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI54_0, kind: FK_Data_4		; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI53_0, kind: FK_Data_4
; X86-AVX512VL-NEXT: retl ## encoding: [0xc3]		; X86-AVX512VL-NEXT: retl ## encoding: [0xc3]
;		;
; X64-AVX-LABEL: test_x86_avx2_packusdw_fold:		; X64-AVX-LABEL: test_x86_avx2_packusdw_fold:
; X64-AVX: ## %bb.0:		; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]		; X64-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
; X64-AVX-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]		; X64-AVX-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
; X64-AVX-NEXT: ## fixup A - offset: 4, value: LCPI54_0-4, kind: reloc_riprel_4byte		; X64-AVX-NEXT: ## fixup A - offset: 4, value: LCPI53_0-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: retq ## encoding: [0xc3]		; X64-AVX-NEXT: retq ## encoding: [0xc3]
;		;
; X64-AVX512VL-LABEL: test_x86_avx2_packusdw_fold:		; X64-AVX512VL-LABEL: test_x86_avx2_packusdw_fold:
; X64-AVX512VL: ## %bb.0:		; X64-AVX512VL: ## %bb.0:
; X64-AVX512VL-NEXT: vmovaps {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]		; X64-AVX512VL-NEXT: vmovaps {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]		; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI54_0-4, kind: reloc_riprel_4byte		; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI53_0-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: retq ## encoding: [0xc3]		; X64-AVX512VL-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)		%res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
ret <16 x i16> %res		ret <16 x i16> %res
}		}


define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) {		define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) {
; X86-LABEL: test_x86_avx2_pblendvb:		; X86-LABEL: test_x86_avx2_pblendvb:
▲ Show 20 Lines • Show All 703 Lines • ▼ Show 20 Lines	; X64-AVX512VL-NEXT: retq ## encoding: [0xc3]
ret <4 x i32> %res		ret <4 x i32> %res
}		}

define <4 x i32> @test_x86_avx2_psrav_d_const(<4 x i32> %a0, <4 x i32> %a1) {		define <4 x i32> @test_x86_avx2_psrav_d_const(<4 x i32> %a0, <4 x i32> %a1) {
; X86-AVX-LABEL: test_x86_avx2_psrav_d_const:		; X86-AVX-LABEL: test_x86_avx2_psrav_d_const:
; X86-AVX: ## %bb.0:		; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23]		; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23]
; X86-AVX-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]		; X86-AVX-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X86-AVX-NEXT: ## fixup A - offset: 4, value: LCPI86_0, kind: FK_Data_4		; X86-AVX-NEXT: ## fixup A - offset: 4, value: LCPI85_0, kind: FK_Data_4
; X86-AVX-NEXT: vpsravd LCPI86_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]		; X86-AVX-NEXT: vpsravd LCPI85_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
; X86-AVX-NEXT: ## fixup A - offset: 5, value: LCPI86_1, kind: FK_Data_4		; X86-AVX-NEXT: ## fixup A - offset: 5, value: LCPI85_1, kind: FK_Data_4
; X86-AVX-NEXT: retl ## encoding: [0xc3]		; X86-AVX-NEXT: retl ## encoding: [0xc3]
;		;
; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_const:		; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_const:
; X86-AVX512VL: ## %bb.0:		; X86-AVX512VL: ## %bb.0:
; X86-AVX512VL-NEXT: vmovdqa LCPI86_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23]		; X86-AVX512VL-NEXT: vmovdqa LCPI85_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23]
; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]		; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI86_0, kind: FK_Data_4		; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI85_0, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpsravd LCPI86_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]		; X86-AVX512VL-NEXT: vpsravd LCPI85_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI86_1, kind: FK_Data_4		; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI85_1, kind: FK_Data_4
; X86-AVX512VL-NEXT: retl ## encoding: [0xc3]		; X86-AVX512VL-NEXT: retl ## encoding: [0xc3]
;		;
; X64-AVX-LABEL: test_x86_avx2_psrav_d_const:		; X64-AVX-LABEL: test_x86_avx2_psrav_d_const:
; X64-AVX: ## %bb.0:		; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23]		; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23]
; X64-AVX-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]		; X64-AVX-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X64-AVX-NEXT: ## fixup A - offset: 4, value: LCPI86_0-4, kind: reloc_riprel_4byte		; X64-AVX-NEXT: ## fixup A - offset: 4, value: LCPI85_0-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]		; X64-AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
; X64-AVX-NEXT: ## fixup A - offset: 5, value: LCPI86_1-4, kind: reloc_riprel_4byte		; X64-AVX-NEXT: ## fixup A - offset: 5, value: LCPI85_1-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: retq ## encoding: [0xc3]		; X64-AVX-NEXT: retq ## encoding: [0xc3]
;		;
; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_const:		; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_const:
; X64-AVX512VL: ## %bb.0:		; X64-AVX512VL: ## %bb.0:
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23]		; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23]
; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]		; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI86_0-4, kind: reloc_riprel_4byte		; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI85_0-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]		; X64-AVX512VL-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI86_1-4, kind: reloc_riprel_4byte		; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI85_1-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: retq ## encoding: [0xc3]		; X64-AVX512VL-NEXT: retq ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)		%res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)
ret <4 x i32> %res		ret <4 x i32> %res
}		}
declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone		declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone

define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) {		define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) {
; X86-AVX-LABEL: test_x86_avx2_psrav_d_256:		; X86-AVX-LABEL: test_x86_avx2_psrav_d_256:
Show All 19 Lines	; X64-AVX512VL-NEXT: retq ## encoding: [0xc3]
ret <8 x i32> %res		ret <8 x i32> %res
}		}

define <8 x i32> @test_x86_avx2_psrav_d_256_const(<8 x i32> %a0, <8 x i32> %a1) {		define <8 x i32> @test_x86_avx2_psrav_d_256_const(<8 x i32> %a0, <8 x i32> %a1) {
; X86-AVX-LABEL: test_x86_avx2_psrav_d_256_const:		; X86-AVX-LABEL: test_x86_avx2_psrav_d_256_const:
; X86-AVX: ## %bb.0:		; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]		; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
; X86-AVX-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]		; X86-AVX-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X86-AVX-NEXT: ## fixup A - offset: 4, value: LCPI88_0, kind: FK_Data_4		; X86-AVX-NEXT: ## fixup A - offset: 4, value: LCPI87_0, kind: FK_Data_4
; X86-AVX-NEXT: vpsravd LCPI88_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]		; X86-AVX-NEXT: vpsravd LCPI87_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
; X86-AVX-NEXT: ## fixup A - offset: 5, value: LCPI88_1, kind: FK_Data_4		; X86-AVX-NEXT: ## fixup A - offset: 5, value: LCPI87_1, kind: FK_Data_4
; X86-AVX-NEXT: retl ## encoding: [0xc3]		; X86-AVX-NEXT: retl ## encoding: [0xc3]
;		;
; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const:		; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const:
; X86-AVX512VL: ## %bb.0:		; X86-AVX512VL: ## %bb.0:
; X86-AVX512VL-NEXT: vmovdqa LCPI88_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]		; X86-AVX512VL-NEXT: vmovdqa LCPI87_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]		; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI88_0, kind: FK_Data_4		; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI87_0, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpsravd LCPI88_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]		; X86-AVX512VL-NEXT: vpsravd LCPI87_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI88_1, kind: FK_Data_4		; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI87_1, kind: FK_Data_4
; X86-AVX512VL-NEXT: retl ## encoding: [0xc3]		; X86-AVX512VL-NEXT: retl ## encoding: [0xc3]
;		;
; X64-AVX-LABEL: test_x86_avx2_psrav_d_256_const:		; X64-AVX-LABEL: test_x86_avx2_psrav_d_256_const:
; X64-AVX: ## %bb.0:		; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]		; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
; X64-AVX-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]		; X64-AVX-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X64-AVX-NEXT: ## fixup A - offset: 4, value: LCPI88_0-4, kind: reloc_riprel_4byte		; X64-AVX-NEXT: ## fixup A - offset: 4, value: LCPI87_0-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]		; X64-AVX-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
; X64-AVX-NEXT: ## fixup A - offset: 5, value: LCPI88_1-4, kind: reloc_riprel_4byte		; X64-AVX-NEXT: ## fixup A - offset: 5, value: LCPI87_1-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: retq ## encoding: [0xc3]		; X64-AVX-NEXT: retq ## encoding: [0xc3]
;		;
; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const:		; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const:
; X64-AVX512VL: ## %bb.0:		; X64-AVX512VL: ## %bb.0:
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]		; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]		; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI88_0-4, kind: reloc_riprel_4byte		; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI87_0-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]		; X64-AVX512VL-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI88_1-4, kind: reloc_riprel_4byte		; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI87_1-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: retq ## encoding: [0xc3]		; X64-AVX512VL-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)		%res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)
ret <8 x i32> %res		ret <8 x i32> %res
}		}
declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone		declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone

define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, <4 x i32> %idx, <2 x double> %mask) {		define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, <4 x i32> %idx, <2 x double> %mask) {
; X86-LABEL: test_x86_avx2_gather_d_pd:		; X86-LABEL: test_x86_avx2_gather_d_pd:
▲ Show 20 Lines • Show All 334 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll

Show First 20 Lines • Show All 99 Lines • ▼ Show 20 Lines	; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1)		%res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask)		%res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask)		%res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask)
%res3 = add <32 x i16> %res, %res1		%res3 = add <32 x i16> %res, %res1
%res4 = add <32 x i16> %res2, %res3		%res4 = add <32 x i16> %res2, %res3
ret <32 x i16> %res4		ret <32 x i16> %res4
}		}

		declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)

		define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
		; AVX512BW-LABEL: test_int_x86_avx512_mask_psadb_w_512:
		; AVX512BW: ## %bb.0:
		; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm1
		; AVX512BW-NEXT: vpsadbw %zmm2, %zmm0, %zmm0
		; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0
		; AVX512BW-NEXT: retq
		;
		; AVX512F-32-LABEL: test_int_x86_avx512_mask_psadb_w_512:
		; AVX512F-32: # %bb.0:
		; AVX512F-32-NEXT: vpsadbw %zmm1, %zmm0, %zmm1
		; AVX512F-32-NEXT: vpsadbw %zmm2, %zmm0, %zmm0
		; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0
		; AVX512F-32-NEXT: retl
		%res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
		%res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
		%res2 = add <8 x i64> %res, %res1
		ret <8 x i64> %res2
		}

declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64)		declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64)

define void@test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8> %x1, i64 %x2) {		define void@test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8> %x1, i64 %x2) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_b_512:		; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_b_512:
; AVX512BW: ## %bb.0:		; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovq %rdx, %k1		; AVX512BW-NEXT: kmovq %rdx, %k1
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}		; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rsi)		; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rsi)
▲ Show 20 Lines • Show All 2,628 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/avx512bw-intrinsics.ll

Show First 20 Lines • Show All 1,437 Lines • ▼ Show 20 Lines	; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)		%res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4)		%res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1)		%res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1)
%res3 = add <32 x i16> %res, %res1		%res3 = add <32 x i16> %res, %res1
%res4 = add <32 x i16> %res3, %res2		%res4 = add <32 x i16> %res3, %res2
ret <32 x i16> %res4		ret <32 x i16> %res4
}		}

declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)

define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
; AVX512BW-LABEL: test_int_x86_avx512_mask_psadb_w_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm1
; AVX512BW-NEXT: vpsadbw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psadb_w_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vpsadbw %zmm1, %zmm0, %zmm1
; AVX512F-32-NEXT: vpsadbw %zmm2, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
%res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}

declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)		declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)

define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {		define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_psrlv32hi:		; AVX512BW-LABEL: test_int_x86_avx512_mask_psrlv32hi:
; AVX512BW: ## %bb.0:		; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3		; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3
; AVX512BW-NEXT: kmovd %edi, %k1		; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}		; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
▲ Show 20 Lines • Show All 440 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

Show First 20 Lines • Show All 2,056 Lines • ▼ Show 20 Lines	; X64-NEXT: retq
call void @llvm.x86.sse2.pause()		call void @llvm.x86.sse2.pause()
ret void		ret void
}		}
declare void @llvm.x86.sse2.pause() nounwind readnone		declare void @llvm.x86.sse2.pause() nounwind readnone

define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {		define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; X32-LABEL: test_mm_sad_epu8:		; X32-LABEL: test_mm_sad_epu8:
; X32: # %bb.0:		; X32: # %bb.0:
; X32-NEXT: psadbw %xmm1, %xmm0		; X32-NEXT: movdqa %xmm0, %xmm2
		; X32-NEXT: pminub %xmm1, %xmm0
		; X32-NEXT: pcmpeqb %xmm2, %xmm0
		; X32-NEXT: pcmpeqd %xmm3, %xmm3
		; X32-NEXT: pxor %xmm0, %xmm3
		; X32-NEXT: movdqa %xmm2, %xmm4
		; X32-NEXT: psubb %xmm1, %xmm4
		; X32-NEXT: psubb %xmm2, %xmm1
		; X32-NEXT: pandn %xmm1, %xmm3
		; X32-NEXT: pandn %xmm4, %xmm0
		; X32-NEXT: por %xmm3, %xmm0
		; X32-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
		; X32-NEXT: movdqa %xmm0, %xmm1
		; X32-NEXT: pand %xmm2, %xmm1
		; X32-NEXT: movdqa %xmm0, %xmm3
		; X32-NEXT: psrlw $8, %xmm3
		; X32-NEXT: pand %xmm2, %xmm3
		; X32-NEXT: movdqa %xmm0, %xmm4
		; X32-NEXT: psrld $16, %xmm4
		; X32-NEXT: pand %xmm2, %xmm4
		; X32-NEXT: paddq %xmm3, %xmm4
		; X32-NEXT: movdqa %xmm0, %xmm3
		; X32-NEXT: psrld $24, %xmm3
		; X32-NEXT: pand %xmm2, %xmm3
		; X32-NEXT: paddq %xmm4, %xmm3
		; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
		; X32-NEXT: pand %xmm2, %xmm4
		; X32-NEXT: paddq %xmm3, %xmm4
		; X32-NEXT: movdqa %xmm0, %xmm3
		; X32-NEXT: psrlq $40, %xmm3
		; X32-NEXT: pand %xmm2, %xmm3
		; X32-NEXT: paddq %xmm4, %xmm3
		; X32-NEXT: movdqa %xmm0, %xmm4
		; X32-NEXT: psrlq $48, %xmm4
		; X32-NEXT: pand %xmm2, %xmm4
		; X32-NEXT: paddq %xmm3, %xmm4
		; X32-NEXT: psrlq $56, %xmm0
		; X32-NEXT: paddq %xmm4, %xmm0
		; X32-NEXT: paddq %xmm1, %xmm0
; X32-NEXT: retl		; X32-NEXT: retl
;		;
; X64-LABEL: test_mm_sad_epu8:		; X64-LABEL: test_mm_sad_epu8:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: psadbw %xmm1, %xmm0		; X64-NEXT: movdqa %xmm0, %xmm2
		; X64-NEXT: pminub %xmm1, %xmm0
		; X64-NEXT: pcmpeqb %xmm2, %xmm0
		; X64-NEXT: pcmpeqd %xmm3, %xmm3
		; X64-NEXT: pxor %xmm0, %xmm3
		; X64-NEXT: movdqa %xmm2, %xmm4
		; X64-NEXT: psubb %xmm1, %xmm4
		; X64-NEXT: psubb %xmm2, %xmm1
		; X64-NEXT: pandn %xmm1, %xmm3
		; X64-NEXT: pandn %xmm4, %xmm0
		; X64-NEXT: por %xmm3, %xmm0
		; X64-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
		; X64-NEXT: movdqa %xmm0, %xmm1
		; X64-NEXT: pand %xmm2, %xmm1
		; X64-NEXT: movdqa %xmm0, %xmm3
		; X64-NEXT: psrlw $8, %xmm3
		; X64-NEXT: pand %xmm2, %xmm3
		; X64-NEXT: movdqa %xmm0, %xmm4
		; X64-NEXT: psrld $16, %xmm4
		; X64-NEXT: pand %xmm2, %xmm4
		; X64-NEXT: paddq %xmm3, %xmm4
		; X64-NEXT: movdqa %xmm0, %xmm3
		; X64-NEXT: psrld $24, %xmm3
		; X64-NEXT: pand %xmm2, %xmm3
		; X64-NEXT: paddq %xmm4, %xmm3
		; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
		; X64-NEXT: pand %xmm2, %xmm4
		; X64-NEXT: paddq %xmm3, %xmm4
		; X64-NEXT: movdqa %xmm0, %xmm3
		; X64-NEXT: psrlq $40, %xmm3
		; X64-NEXT: pand %xmm2, %xmm3
		; X64-NEXT: paddq %xmm4, %xmm3
		; X64-NEXT: movdqa %xmm0, %xmm4
		; X64-NEXT: psrlq $48, %xmm4
		; X64-NEXT: pand %xmm2, %xmm4
		; X64-NEXT: paddq %xmm3, %xmm4
		; X64-NEXT: psrlq $56, %xmm0
		; X64-NEXT: paddq %xmm4, %xmm0
		; X64-NEXT: paddq %xmm1, %xmm0
; X64-NEXT: retq		; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>		%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>		%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1)		%1 = icmp ugt <16 x i8> %arg0, %arg1
		%2 = sub <16 x i8> %arg0, %arg1
		%3 = sub <16 x i8> %arg1, %arg0
		%4 = select <16 x i1> %1, <16 x i8> %2, <16 x i8> %3
		%5 = shufflevector <16 x i8> %4, <16 x i8> %4, <2 x i32> <i32 0, i32 8>
		%6 = zext <2 x i8> %5 to <2 x i64>
		%7 = shufflevector <16 x i8> %4, <16 x i8> %4, <2 x i32> <i32 1, i32 9>
		%8 = zext <2 x i8> %7 to <2 x i64>
		%9 = add <2 x i64> %6, %8
		%10 = shufflevector <16 x i8> %4, <16 x i8> %4, <2 x i32> <i32 2, i32 10>
		%11 = zext <2 x i8> %10 to <2 x i64>
		%12 = add <2 x i64> %9, %11
		%13 = shufflevector <16 x i8> %4, <16 x i8> %4, <2 x i32> <i32 3, i32 11>
		%14 = zext <2 x i8> %13 to <2 x i64>
		%15 = add <2 x i64> %12, %14
		%16 = shufflevector <16 x i8> %4, <16 x i8> %4, <2 x i32> <i32 4, i32 12>
		%17 = zext <2 x i8> %16 to <2 x i64>
		%18 = add <2 x i64> %15, %17
		%19 = shufflevector <16 x i8> %4, <16 x i8> %4, <2 x i32> <i32 5, i32 13>
		%20 = zext <2 x i8> %19 to <2 x i64>
		%21 = add <2 x i64> %18, %20
		%22 = shufflevector <16 x i8> %4, <16 x i8> %4, <2 x i32> <i32 6, i32 14>
		%23 = zext <2 x i8> %22 to <2 x i64>
		%24 = add <2 x i64> %21, %23
		%25 = shufflevector <16 x i8> %4, <16 x i8> %4, <2 x i32> <i32 7, i32 15>
		%26 = zext <2 x i8> %25 to <2 x i64>
		%res = add <2 x i64> %24, %26
ret <2 x i64> %res		ret <2 x i64> %res
}		}
declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone

define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {		define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
; X32-LABEL: test_mm_set_epi8:		; X32-LABEL: test_mm_set_epi8:
; X32: # %bb.0:		; X32: # %bb.0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax		; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0		; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax		; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm1		; X32-NEXT: movd %eax, %xmm1
▲ Show 20 Lines • Show All 1,819 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 \| FileCheck %s			; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 \| FileCheck %s

				define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
				; CHECK-LABEL: test_x86_sse2_psad_bw:
				; CHECK: ## %bb.0:
				; CHECK-NEXT: psadbw %xmm1, %xmm0
				; CHECK-NEXT: retl
				%res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
				ret <2 x i64> %res
				}
				declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone


	define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {			define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
	; CHECK-LABEL: test_x86_sse2_psll_dq_bs:			; CHECK-LABEL: test_x86_sse2_psll_dq_bs:
	; CHECK: ## %bb.0:			; CHECK: ## %bb.0:
	; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]			; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
	; CHECK-NEXT: retl			; CHECK-NEXT: retl
	%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]			%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
	ret <2 x i64> %res			ret <2 x i64> %res
	}			}
	▲ Show 20 Lines • Show All 247 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll

	Show First 20 Lines • Show All 1,123 Lines • ▼ Show 20 Lines
	; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1]			; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1]
	; SKX-NEXT: retl ## encoding: [0xc3]			; SKX-NEXT: retl ## encoding: [0xc3]
	%res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]			%res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
	ret <8 x i16> %res			ret <8 x i16> %res
	}			}
	declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone			declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone


	define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
	; SSE-LABEL: test_x86_sse2_psad_bw:
	; SSE: ## %bb.0:
	; SSE-NEXT: psadbw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf6,0xc1]
	; SSE-NEXT: retl ## encoding: [0xc3]
	;
	; AVX2-LABEL: test_x86_sse2_psad_bw:
	; AVX2: ## %bb.0:
	; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf6,0xc1]
	; AVX2-NEXT: retl ## encoding: [0xc3]
	;
	; SKX-LABEL: test_x86_sse2_psad_bw:
	; SKX: ## %bb.0:
	; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1]
	; SKX-NEXT: retl ## encoding: [0xc3]
	%res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
	ret <2 x i64> %res
	}
	declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone


	define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {			define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
	; SSE-LABEL: test_x86_sse2_psll_d:			; SSE-LABEL: test_x86_sse2_psll_d:
	; SSE: ## %bb.0:			; SSE: ## %bb.0:
	; SSE-NEXT: pslld %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf2,0xc1]			; SSE-NEXT: pslld %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf2,0xc1]
	; SSE-NEXT: retl ## encoding: [0xc3]			; SSE-NEXT: retl ## encoding: [0xc3]
	;			;
	; AVX2-LABEL: test_x86_sse2_psll_d:			; AVX2-LABEL: test_x86_sse2_psll_d:
	; AVX2: ## %bb.0:			; AVX2: ## %bb.0:
	▲ Show 20 Lines • Show All 694 Lines • Show Last 20 Lines

llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll

	Show All 32 Lines
	; CHECK: or i64			; CHECK: or i64
	; CHECK: bitcast i64 {{.*}} to <4 x i16>			; CHECK: bitcast i64 {{.*}} to <4 x i16>
	; CHECK: icmp ne <4 x i16> {{.*}}, zeroinitializer			; CHECK: icmp ne <4 x i16> {{.*}}, zeroinitializer
	; CHECK: sext <4 x i1> {{.*}} to <4 x i16>			; CHECK: sext <4 x i1> {{.*}} to <4 x i16>
	; CHECK: bitcast <4 x i16> {{.*}} to i64			; CHECK: bitcast <4 x i16> {{.*}} to i64
	; CHECK: ret x86_mmx			; CHECK: ret x86_mmx


	define <2 x i64> @Test_x86_sse2_psad_bw(<16 x i8> %a, <16 x i8> %b) sanitize_memory {
	%c = tail call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a, <16 x i8> %b)
	ret <2 x i64> %c
	}

	; CHECK-LABEL: @Test_x86_sse2_psad_bw(
	; CHECK: or <16 x i8> {{.}}, {{.}}
	; CHECK: bitcast <16 x i8> {{.*}} to <2 x i64>
	; CHECK: icmp ne <2 x i64> {{.*}}, zeroinitializer
	; CHECK: sext <2 x i1> {{.*}} to <2 x i64>
	; CHECK: lshr <2 x i64> {{.*}}, <i64 48, i64 48>
	; CHECK: ret <2 x i64>


	define x86_mmx @Test_x86_mmx_psad_bw(x86_mmx %a, x86_mmx %b) sanitize_memory {			define x86_mmx @Test_x86_mmx_psad_bw(x86_mmx %a, x86_mmx %b) sanitize_memory {
	entry:			entry:
	%c = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind			%c = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind
	ret x86_mmx %c			ret x86_mmx %c
	}			}

	; CHECK-LABEL: @Test_x86_mmx_psad_bw(			; CHECK-LABEL: @Test_x86_mmx_psad_bw(
	; CHECK: or i64			; CHECK: or i64
	; CHECK: icmp ne i64			; CHECK: icmp ne i64
	; CHECK: sext i1 {{.*}} to i64			; CHECK: sext i1 {{.*}} to i64
	; CHECK: lshr i64 {{.*}}, 48			; CHECK: lshr i64 {{.*}}, 48
	; CHECK: ret x86_mmx			; CHECK: ret x86_mmx

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Lowering SAD (sum of absolute differences) intrinsics to native IR (LLVM side)
AbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 143895

llvm/include/llvm/IR/IntrinsicsX86.td

llvm/lib/IR/AutoUpgrade.cpp

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/lib/Target/X86/X86IntrinsicsInfo.h

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll

llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll

llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll

llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll

llvm/test/CodeGen/X86/avx512bw-intrinsics.ll

llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll

llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll

llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Lowering SAD (sum of absolute differences) intrinsics to native IR (LLVM side)AbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 143895

llvm/include/llvm/IR/IntrinsicsX86.td

llvm/lib/IR/AutoUpgrade.cpp

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/lib/Target/X86/X86IntrinsicsInfo.h

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll

llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll

llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll

llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll

llvm/test/CodeGen/X86/avx512bw-intrinsics.ll

llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll

llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll

llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll

[X86] Lowering SAD (sum of absolute differences) intrinsics to native IR (LLVM side)
AbandonedPublic