Diff 128383

include/llvm/IR/IntrinsicsX86.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 171 Lines • ▼ Show 20 Lines
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// SSE1		// SSE1

// Arithmetic ops		// Arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".		let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss">,		def int_x86_sse_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],		Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;		[IntrNoMem]>;
def int_x86_sse_sqrt_ps : GCCBuiltin<"__builtin_ia32_sqrtps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_sse_rcp_ss : GCCBuiltin<"__builtin_ia32_rcpss">,		def int_x86_sse_rcp_ss : GCCBuiltin<"__builtin_ia32_rcpss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],		Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;		[IntrNoMem]>;
def int_x86_sse_rcp_ps : GCCBuiltin<"__builtin_ia32_rcpps">,		def int_x86_sse_rcp_ps : GCCBuiltin<"__builtin_ia32_rcpps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],		Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;		[IntrNoMem]>;
def int_x86_sse_rsqrt_ss : GCCBuiltin<"__builtin_ia32_rsqrtss">,		def int_x86_sse_rsqrt_ss : GCCBuiltin<"__builtin_ia32_rsqrtss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],		Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
▲ Show 20 Lines • Show All 111 Lines • ▼ Show 20 Lines
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// SSE2		// SSE2

// FP arithmetic ops		// FP arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".		let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd">,		def int_x86_sse2_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],		Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
[IntrNoMem]>;		[IntrNoMem]>;
def int_x86_sse2_sqrt_pd : GCCBuiltin<"__builtin_ia32_sqrtpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_sse2_min_sd : GCCBuiltin<"__builtin_ia32_minsd">,		def int_x86_sse2_min_sd : GCCBuiltin<"__builtin_ia32_minsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,		Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;		llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_min_pd : GCCBuiltin<"__builtin_ia32_minpd">,		def int_x86_sse2_min_pd : GCCBuiltin<"__builtin_ia32_minpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,		Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;		llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_max_sd : GCCBuiltin<"__builtin_ia32_maxsd">,		def int_x86_sse2_max_sd : GCCBuiltin<"__builtin_ia32_maxsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,		Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
▲ Show 20 Lines • Show All 656 Lines • ▼ Show 20 Lines	def int_x86_avx_max_ps_256 : GCCBuiltin<"__builtin_ia32_maxps256">,
llvm_v8f32_ty], [IntrNoMem]>;		llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx_min_pd_256 : GCCBuiltin<"__builtin_ia32_minpd256">,		def int_x86_avx_min_pd_256 : GCCBuiltin<"__builtin_ia32_minpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,		Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;		llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_min_ps_256 : GCCBuiltin<"__builtin_ia32_minps256">,		def int_x86_avx_min_ps_256 : GCCBuiltin<"__builtin_ia32_minps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,		Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;		llvm_v8f32_ty], [IntrNoMem]>;

def int_x86_avx_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;

def int_x86_avx_rsqrt_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrtps256">,		def int_x86_avx_rsqrt_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrtps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;		Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;

def int_x86_avx_rcp_ps_256 : GCCBuiltin<"__builtin_ia32_rcpps256">,		def int_x86_avx_rcp_ps_256 : GCCBuiltin<"__builtin_ia32_rcpps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;		Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;

def int_x86_avx_round_pd_256 : GCCBuiltin<"__builtin_ia32_roundpd256">,		def int_x86_avx_round_pd_256 : GCCBuiltin<"__builtin_ia32_roundpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,		Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
▲ Show 20 Lines • Show All 3,502 Lines • ▼ Show 20 Lines	def int_x86_avx512_mask_scalef_ps_128 : GCCBuiltin<"__builtin_ia32_scalefps128_mask">,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;		llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_scalef_ps_256 : GCCBuiltin<"__builtin_ia32_scalefps256_mask">,		def int_x86_avx512_mask_scalef_ps_256 : GCCBuiltin<"__builtin_ia32_scalefps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,		Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;		llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_scalef_ps_512 : GCCBuiltin<"__builtin_ia32_scalefps512_mask">,		def int_x86_avx512_mask_scalef_ps_512 : GCCBuiltin<"__builtin_ia32_scalefps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,		Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;		llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;

def int_x86_avx512_mask_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss_round_mask">,		def int_x86_avx512_sqrt_ss_mask : GCCBuiltin<"__builtin_ia32_sqrtss_mask">,
		craig.topperUnsubmitted Not Done Reply Inline Actions Why are we renaming intrinsics here? Is this done to purposely exclude the AVX512 intrinsics? Why are we doing that? craig.topper: Why are we renaming intrinsics here? Is this done to purposely exclude the AVX512 intrinsics?
		mike.dvoretskyUnsubmitted Not Done Reply Inline Actions It seems to me that this is done to avoid unconditionally generating the intrinsic in CodeGenFunction::EmitBuiltinExpr in CGBuiltin.cpp on the clang side while keeping the intrinsic available in IR for cases where the rounding mode isn't 4 and it's not being lowered. I haven't been able to find other intrinsic-lowering patches that take measures to keep the intrinsic available rather than just deleting it from IR, so I can't say if this change is conventional. If it isn't, then we need to either look into changing the algorithm in EmitBuiltinExpr to check for lowering before checking if llvm supports the intrinsic, or propose a renaming convention for cases like this one. In the latter case I would propose to put "nonlowered" in the names after the target prefix to keep these distinguishable as renamed, rather than aiming for a similar name and confusing people. mike.dvoretsky: It seems to me that this is done to avoid unconditionally generating the intrinsic in…
		mike.dvoretskyUnsubmitted Not Done Reply Inline Actions Looks like a better method to preserve the intrinsics exists for this case. Instead of renaming them, one may simply remove the GCCBuiltin template from their def's here and leave them untouched in X86IntrinsicsInfo.h. That method should be made conventional for patches like this and D41168. mike.dvoretsky: Looks like a better method to preserve the intrinsics exists for this case. Instead of renaming…
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,		Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;		llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd_round_mask">,		def int_x86_avx512_sqrt_sd_mask : GCCBuiltin<"__builtin_ia32_sqrtsd_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,		Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;		llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;

def int_x86_avx512_mask_sqrt_pd_128 : GCCBuiltin<"__builtin_ia32_sqrtpd128_mask">,		def int_x86_avx512_sqrt_pd_512_mask : GCCBuiltin<"__builtin_ia32_sqrt_pd512_mask">,
		craig.topperUnsubmitted Not Done Reply Inline Actions Isn't clang still using this one when the rounding mode is non-default? craig.topper: Isn't clang still using this one when the rounding mode is non-default?
		tkrupaAuthorUnsubmitted Not Done Reply Inline Actions It does, ss and sd intrinsics also do. The GCCBuiltin binds needed to be removed to enable lowering in AutoUpgrade but yeah, these definitions should stay. Is erasing the FIXME enough or should there be some note to not remove them? tkrupa: It does, ss and sd intrinsics also do. The GCCBuiltin binds needed to be removed to enable…
		craig.topperUnsubmitted Not Done Reply Inline Actions Removing the FIXME should be enough. If anyone tries to delete it, they'll get a build error in clang. craig.topper: Removing the FIXME should be enough. If anyone tries to delete it, they'll get a build error in…
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,		Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;		llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_sqrt_ps_128 : GCCBuiltin<"__builtin_ia32_sqrtps128_mask">,		def int_x86_avx512_sqrt_ps_512_mask : GCCBuiltin<"__builtin_ia32_sqrt_ps512_mask">,
		craig.topperUnsubmitted Not Done Reply Inline Actions Same with this one? craig.topper: Same with this one?
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,		Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;		llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_fixupimm_pd_128 :		def int_x86_avx512_mask_fixupimm_pd_128 :
GCCBuiltin<"__builtin_ia32_fixupimmpd128_mask">,		GCCBuiltin<"__builtin_ia32_fixupimmpd128_mask">,
Intrinsic<[llvm_v2f64_ty],		Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty],		[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty],
[IntrNoMem]>;		[IntrNoMem]>;
def int_x86_avx512_maskz_fixupimm_pd_128 :		def int_x86_avx512_maskz_fixupimm_pd_128 :
▲ Show 20 Lines • Show All 1,973 Lines • Show Last 20 Lines

lib/IR/AutoUpgrade.cpp

Show First 20 Lines • Show All 73 Lines • ▼ Show 20 Lines	if (Name=="ssse3.pabs.b.128" \|\| // Added in 6.0
Name=="ssse3.pabs.w.128" \|\| // Added in 6.0		Name=="ssse3.pabs.w.128" \|\| // Added in 6.0
Name=="ssse3.pabs.d.128" \|\| // Added in 6.0		Name=="ssse3.pabs.d.128" \|\| // Added in 6.0
Name.startswith("avx512.mask.shuf.i") \|\| // Added in 6.0		Name.startswith("avx512.mask.shuf.i") \|\| // Added in 6.0
Name.startswith("avx512.mask.shuf.f") \|\| // Added in 6.0		Name.startswith("avx512.mask.shuf.f") \|\| // Added in 6.0
Name.startswith("avx512.kunpck") \|\| //added in 6.0		Name.startswith("avx512.kunpck") \|\| //added in 6.0
Name.startswith("avx2.pabs.") \|\| // Added in 6.0		Name.startswith("avx2.pabs.") \|\| // Added in 6.0
Name.startswith("avx512.mask.pabs.") \|\| // Added in 6.0		Name.startswith("avx512.mask.pabs.") \|\| // Added in 6.0
Name.startswith("avx512.broadcastm") \|\| // Added in 6.0		Name.startswith("avx512.broadcastm") \|\| // Added in 6.0
		Name.startswith("avx512.mask.sqrt") \|\| // Added in 6.0
		craig.topperUnsubmitted Done Reply Inline Actions The sse.sqrt.ss and sse2.sqrt.sd intrinsics are still in IntrinsicsX86.td craig.topper: The sse.sqrt.ss and sse2.sqrt.sd intrinsics are still in IntrinsicsX86.td
		Name.startswith("avx.sqrt.p") \|\| // Added in 6.0
		Name.startswith("sse2.sqrt.p") \|\| // Added in 6.0
		Name.startswith("sse.sqrt.p") \|\| // Added in 6.0
Name.startswith("avx512.mask.pbroadcast") \|\| // Added in 6.0		Name.startswith("avx512.mask.pbroadcast") \|\| // Added in 6.0
Name.startswith("sse2.pcmpeq.") \|\| // Added in 3.1		Name.startswith("sse2.pcmpeq.") \|\| // Added in 3.1
Name.startswith("sse2.pcmpgt.") \|\| // Added in 3.1		Name.startswith("sse2.pcmpgt.") \|\| // Added in 3.1
Name.startswith("avx2.pcmpeq.") \|\| // Added in 3.1		Name.startswith("avx2.pcmpeq.") \|\| // Added in 3.1
Name.startswith("avx2.pcmpgt.") \|\| // Added in 3.1		Name.startswith("avx2.pcmpgt.") \|\| // Added in 3.1
Name.startswith("avx512.mask.pcmpeq.") \|\| // Added in 3.9		Name.startswith("avx512.mask.pcmpeq.") \|\| // Added in 3.9
Name.startswith("avx512.mask.pcmpgt.") \|\| // Added in 3.9		Name.startswith("avx512.mask.pcmpgt.") \|\| // Added in 3.9
Name.startswith("avx.vperm2f128.") \|\| // Added in 6.0		Name.startswith("avx.vperm2f128.") \|\| // Added in 6.0
▲ Show 20 Lines • Show All 949 Lines • ▼ Show 20 Lines	if (!NewFn) {
} else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {		} else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
Type *ExtTy = Type::getInt32Ty(C);		Type *ExtTy = Type::getInt32Ty(C);
if (CI->getOperand(0)->getType()->isIntegerTy(8))		if (CI->getOperand(0)->getType()->isIntegerTy(8))
ExtTy = Type::getInt64Ty(C);		ExtTy = Type::getInt64Ty(C);
unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /		unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
ExtTy->getPrimitiveSizeInBits();		ExtTy->getPrimitiveSizeInBits();
Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);		Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
Rep = Builder.CreateVectorSplat(NumElts, Rep);		Rep = Builder.CreateVectorSplat(NumElts, Rep);
		} else if (IsX86 && (Name.startswith("avx512.mask.sqrt.s"))) {
		if (cast<llvm::ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
		Intrinsic::ID ID;
		if (Name == "avx512.mask.sqrt.sd")
		ID = Intrinsic::x86_avx512_sqrt_sd_mask;
		else
		ID = Intrinsic::x86_avx512_sqrt_ss_mask;
		Function *Intrin = Intrinsic::getDeclaration(F->getParent(), ID);
		Rep = Builder.CreateCall(Intrin,
		{CI->getArgOperand(0), CI->getArgOperand(1),
		CI->getArgOperand(2), CI->getArgOperand(3),
		CI->getArgOperand(4)});
		} else {
		llvm::Value *C0 = llvm::ConstantInt::get(Type::getInt32Ty(C), 0);
		Value *A = Builder.CreateExtractElement(CI->getArgOperand(0), C0);
		Function *Intrin = Intrinsic::getDeclaration(
		F->getParent(), Intrinsic::sqrt, A->getType());
		Value *Src = Builder.CreateExtractElement(CI->getArgOperand(2), C0);
		Value *Mask = CI->getArgOperand(3);
		int MaskSize = Mask->getType()->getScalarSizeInBits();
		llvm::Type *MaskTy =
		llvm::VectorType::get(Builder.getInt1Ty(), MaskSize);
		Mask = Builder.CreateBitCast(Mask, MaskTy);
		Mask = Builder.CreateExtractElement(Mask, C0);
		A = Builder.CreateSelect(Mask, Builder.CreateCall(Intrin, {A}), Src);
		Rep = Builder.CreateInsertElement(CI->getArgOperand(1), A, C0);
		}
		} else if (IsX86 && (Name.startswith("avx.sqrt.p") \|\|
		Name.startswith("sse2.sqrt.p") \|\|
		Name.startswith("sse.sqrt.p"))) {
		Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
		Intrinsic::sqrt,
		CI->getType()),
		{CI->getArgOperand(0)});
		} else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
		if (Name.endswith("512") &&
		cast<llvm::ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4) {
		Intrinsic::ID ID;
		if (Name == "avx512.mask.sqrt.pd.512")
		ID = Intrinsic::x86_avx512_sqrt_pd_512_mask;
		else
		ID = Intrinsic::x86_avx512_sqrt_ps_512_mask;
		Function *Intrin = Intrinsic::getDeclaration(F->getParent(), ID);
		Rep = Builder.CreateCall(Intrin,
		{CI->getArgOperand(0), CI->getArgOperand(1),
		CI->getArgOperand(2), CI->getArgOperand(3)});
		} else {
		Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
		Intrinsic::sqrt,
		CI->getType()),
		{CI->getArgOperand(0)});
		Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
		CI->getArgOperand(1));
		}
} else if (IsX86 && (Name.startswith("avx512.ptestm") \|\|		} else if (IsX86 && (Name.startswith("avx512.ptestm") \|\|
Name.startswith("avx512.ptestnm"))) {		Name.startswith("avx512.ptestnm"))) {
Value *Op0 = CI->getArgOperand(0);		Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);		Value *Op1 = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);		Value *Mask = CI->getArgOperand(2);
Rep = Builder.CreateAnd(Op0, Op1);		Rep = Builder.CreateAnd(Op0, Op1);
llvm::Type *Ty = Op0->getType();		llvm::Type *Ty = Op0->getType();
Value *Zero = llvm::Constant::getNullValue(Ty);		Value *Zero = llvm::Constant::getNullValue(Ty);
▲ Show 20 Lines • Show All 1,544 Lines • Show Last 20 Lines

lib/Target/X86/X86IntrinsicsInfo.h

Show First 20 Lines • Show All 385 Lines • ▼ Show 20 Lines	static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx_min_pd_256, INTR_TYPE_2OP, X86ISD::FMIN, 0),		X86_INTRINSIC_DATA(avx_min_pd_256, INTR_TYPE_2OP, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx_min_ps_256, INTR_TYPE_2OP, X86ISD::FMIN, 0),		X86_INTRINSIC_DATA(avx_min_ps_256, INTR_TYPE_2OP, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx_movmsk_pd_256, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),		X86_INTRINSIC_DATA(avx_movmsk_pd_256, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(avx_movmsk_ps_256, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),		X86_INTRINSIC_DATA(avx_movmsk_ps_256, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(avx_rcp_ps_256, INTR_TYPE_1OP, X86ISD::FRCP, 0),		X86_INTRINSIC_DATA(avx_rcp_ps_256, INTR_TYPE_1OP, X86ISD::FRCP, 0),
X86_INTRINSIC_DATA(avx_round_pd_256, ROUNDP, X86ISD::VRNDSCALE, 0),		X86_INTRINSIC_DATA(avx_round_pd_256, ROUNDP, X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx_round_ps_256, ROUNDP, X86ISD::VRNDSCALE, 0),		X86_INTRINSIC_DATA(avx_round_ps_256, ROUNDP, X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx_rsqrt_ps_256, INTR_TYPE_1OP, X86ISD::FRSQRT, 0),		X86_INTRINSIC_DATA(avx_rsqrt_ps_256, INTR_TYPE_1OP, X86ISD::FRSQRT, 0),
X86_INTRINSIC_DATA(avx_sqrt_pd_256, INTR_TYPE_1OP, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx_sqrt_ps_256, INTR_TYPE_1OP, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_pd, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),		X86_INTRINSIC_DATA(avx_vpermilvar_pd, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),		X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),		X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),		X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),		X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),		X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),		X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),		X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
▲ Show 20 Lines • Show All 675 Lines • ▼ Show 20 Lines	static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_scalef_ps_256, INTR_TYPE_2OP_MASK_RM,		X86_INTRINSIC_DATA(avx512_mask_scalef_ps_256, INTR_TYPE_2OP_MASK_RM,
X86ISD::SCALEF, 0),		X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,		X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,
X86ISD::SCALEF, 0),		X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM,		X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::SCALEFS, 0),		X86ISD::SCALEFS, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM,		X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::SCALEFS, 0),		X86ISD::SCALEFS, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK, ISD::FSQRT,
X86ISD::FSQRT_RND),
X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_512, INTR_TYPE_1OP_MASK, ISD::FSQRT,
X86ISD::FSQRT_RND),
X86_INTRINSIC_DATA(avx512_mask_sqrt_sd, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FSQRTS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_ss, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FSQRTS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,		X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
X86ISD::FSUB_RND),		X86ISD::FSUB_RND),
X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK, ISD::FSUB,		X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
X86ISD::FSUB_RND),		X86ISD::FSUB_RND),
X86_INTRINSIC_DATA(avx512_mask_sub_sd_round, INTR_TYPE_SCALAR_MASK_RM,		X86_INTRINSIC_DATA(avx512_mask_sub_sd_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FSUBS_RND, 0),		X86ISD::FSUBS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM,		X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FSUBS_RND, 0),		X86ISD::FSUBS_RND, 0),
▲ Show 20 Lines • Show All 434 Lines • ▼ Show 20 Lines	X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),		X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),		X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),		X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),		X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),		X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),		X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),		X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),		X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),

		X86_INTRINSIC_DATA(avx512_sqrt_pd_512_mask, INTR_TYPE_1OP_MASK, ISD::FSQRT,
		X86ISD::FSQRT_RND),
		X86_INTRINSIC_DATA(avx512_sqrt_ps_512_mask, INTR_TYPE_1OP_MASK, ISD::FSQRT,
		X86ISD::FSQRT_RND),
		X86_INTRINSIC_DATA(avx512_sqrt_sd_mask, INTR_TYPE_SCALAR_MASK_RM,
		X86ISD::FSQRTS_RND, 0),
		X86_INTRINSIC_DATA(avx512_sqrt_ss_mask, INTR_TYPE_SCALAR_MASK_RM,
		X86ISD::FSQRTS_RND, 0),

X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),		X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),		X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),		X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
X86_INTRINSIC_DATA(avx512_vcvtsd2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),		X86_INTRINSIC_DATA(avx512_vcvtsd2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
X86_INTRINSIC_DATA(avx512_vcvtsd2usi32, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),		X86_INTRINSIC_DATA(avx512_vcvtsd2usi32, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
X86_INTRINSIC_DATA(avx512_vcvtsd2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),		X86_INTRINSIC_DATA(avx512_vcvtsd2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
X86_INTRINSIC_DATA(avx512_vcvtss2si32, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),		X86_INTRINSIC_DATA(avx512_vcvtss2si32, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),		X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines	X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
X86_INTRINSIC_DATA(sse_comineq_ss, COMI, X86ISD::COMI, ISD::SETNE),		X86_INTRINSIC_DATA(sse_comineq_ss, COMI, X86ISD::COMI, ISD::SETNE),
X86_INTRINSIC_DATA(sse_max_ps, INTR_TYPE_2OP, X86ISD::FMAX, 0),		X86_INTRINSIC_DATA(sse_max_ps, INTR_TYPE_2OP, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(sse_max_ss, INTR_TYPE_2OP, X86ISD::FMAXS, 0),		X86_INTRINSIC_DATA(sse_max_ss, INTR_TYPE_2OP, X86ISD::FMAXS, 0),
X86_INTRINSIC_DATA(sse_min_ps, INTR_TYPE_2OP, X86ISD::FMIN, 0),		X86_INTRINSIC_DATA(sse_min_ps, INTR_TYPE_2OP, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(sse_min_ss, INTR_TYPE_2OP, X86ISD::FMINS, 0),		X86_INTRINSIC_DATA(sse_min_ss, INTR_TYPE_2OP, X86ISD::FMINS, 0),
X86_INTRINSIC_DATA(sse_movmsk_ps, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),		X86_INTRINSIC_DATA(sse_movmsk_ps, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse_rcp_ps, INTR_TYPE_1OP, X86ISD::FRCP, 0),		X86_INTRINSIC_DATA(sse_rcp_ps, INTR_TYPE_1OP, X86ISD::FRCP, 0),
X86_INTRINSIC_DATA(sse_rsqrt_ps, INTR_TYPE_1OP, X86ISD::FRSQRT, 0),		X86_INTRINSIC_DATA(sse_rsqrt_ps, INTR_TYPE_1OP, X86ISD::FRSQRT, 0),
X86_INTRINSIC_DATA(sse_sqrt_ps, INTR_TYPE_1OP, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(sse_ucomieq_ss, COMI, X86ISD::UCOMI, ISD::SETEQ),		X86_INTRINSIC_DATA(sse_ucomieq_ss, COMI, X86ISD::UCOMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse_ucomige_ss, COMI, X86ISD::UCOMI, ISD::SETGE),		X86_INTRINSIC_DATA(sse_ucomige_ss, COMI, X86ISD::UCOMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse_ucomigt_ss, COMI, X86ISD::UCOMI, ISD::SETGT),		X86_INTRINSIC_DATA(sse_ucomigt_ss, COMI, X86ISD::UCOMI, ISD::SETGT),
X86_INTRINSIC_DATA(sse_ucomile_ss, COMI, X86ISD::UCOMI, ISD::SETLE),		X86_INTRINSIC_DATA(sse_ucomile_ss, COMI, X86ISD::UCOMI, ISD::SETLE),
X86_INTRINSIC_DATA(sse_ucomilt_ss, COMI, X86ISD::UCOMI, ISD::SETLT),		X86_INTRINSIC_DATA(sse_ucomilt_ss, COMI, X86ISD::UCOMI, ISD::SETLT),
X86_INTRINSIC_DATA(sse_ucomineq_ss, COMI, X86ISD::UCOMI, ISD::SETNE),		X86_INTRINSIC_DATA(sse_ucomineq_ss, COMI, X86ISD::UCOMI, ISD::SETNE),
X86_INTRINSIC_DATA(sse2_cmp_pd, INTR_TYPE_3OP, X86ISD::CMPP, 0),		X86_INTRINSIC_DATA(sse2_cmp_pd, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(sse2_comieq_sd, COMI, X86ISD::COMI, ISD::SETEQ),		X86_INTRINSIC_DATA(sse2_comieq_sd, COMI, X86ISD::COMI, ISD::SETEQ),
Show All 40 Lines	X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
X86_INTRINSIC_DATA(sse2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0),		X86_INTRINSIC_DATA(sse2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),		X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),		X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),		X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),		X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),		X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(sse2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),		X86_INTRINSIC_DATA(sse2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(sse2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),		X86_INTRINSIC_DATA(sse2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(sse2_sqrt_pd, INTR_TYPE_1OP, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ),		X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE),		X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT),		X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT),
X86_INTRINSIC_DATA(sse2_ucomile_sd, COMI, X86ISD::UCOMI, ISD::SETLE),		X86_INTRINSIC_DATA(sse2_ucomile_sd, COMI, X86ISD::UCOMI, ISD::SETLE),
X86_INTRINSIC_DATA(sse2_ucomilt_sd, COMI, X86ISD::UCOMI, ISD::SETLT),		X86_INTRINSIC_DATA(sse2_ucomilt_sd, COMI, X86ISD::UCOMI, ISD::SETLT),
X86_INTRINSIC_DATA(sse2_ucomineq_sd, COMI, X86ISD::UCOMI, ISD::SETNE),		X86_INTRINSIC_DATA(sse2_ucomineq_sd, COMI, X86ISD::UCOMI, ISD::SETNE),
X86_INTRINSIC_DATA(sse3_addsub_pd, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),		X86_INTRINSIC_DATA(sse3_addsub_pd, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),
X86_INTRINSIC_DATA(sse3_addsub_ps, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),		X86_INTRINSIC_DATA(sse3_addsub_ps, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),
▲ Show 20 Lines • Show All 107 Lines • Show Last 20 Lines

test/CodeGen/X86/avx-intrinsics-fast-isel.ll

	Show First 20 Lines • Show All 3,013 Lines • ▼ Show 20 Lines
	; X32: # %bb.0:			; X32: # %bb.0:
	; X32-NEXT: vsqrtpd %ymm0, %ymm0			; X32-NEXT: vsqrtpd %ymm0, %ymm0
	; X32-NEXT: retl			; X32-NEXT: retl
	;			;
	; X64-LABEL: test_mm256_sqrt_pd:			; X64-LABEL: test_mm256_sqrt_pd:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: vsqrtpd %ymm0, %ymm0			; X64-NEXT: vsqrtpd %ymm0, %ymm0
	; X64-NEXT: retq			; X64-NEXT: retq
	%res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0)			entry:
	ret <4 x double> %res			%0 = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a0) #2
				ret <4 x double> %0
	}			}
	declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
				declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #1

	define <8 x float> @test_mm256_sqrt_ps(<8 x float> %a0) nounwind {			define <8 x float> @test_mm256_sqrt_ps(<8 x float> %a0) nounwind {
	; X32-LABEL: test_mm256_sqrt_ps:			; X32-LABEL: test_mm256_sqrt_ps:
	; X32: # %bb.0:			; X32: # %bb.0:
	; X32-NEXT: vsqrtps %ymm0, %ymm0			; X32-NEXT: vsqrtps %ymm0, %ymm0
	; X32-NEXT: retl			; X32-NEXT: retl
	;			;
	; X64-LABEL: test_mm256_sqrt_ps:			; X64-LABEL: test_mm256_sqrt_ps:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: vsqrtps %ymm0, %ymm0			; X64-NEXT: vsqrtps %ymm0, %ymm0
	; X64-NEXT: retq			; X64-NEXT: retq
	%res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0)			entry:
	ret <8 x float> %res			%0 = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %a0) #2
				ret <8 x float> %0
	}			}
	declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
				declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #1

	define void @test_mm256_store_pd(double* %a0, <4 x double> %a1) nounwind {			define void @test_mm256_store_pd(double* %a0, <4 x double> %a1) nounwind {
	; X32-LABEL: test_mm256_store_pd:			; X32-LABEL: test_mm256_store_pd:
	; X32: # %bb.0:			; X32: # %bb.0:
	; X32-NEXT: movl {{[0-9]+}}(%esp), %eax			; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X32-NEXT: vmovaps %ymm0, (%eax)			; X32-NEXT: vmovaps %ymm0, (%eax)
	; X32-NEXT: vzeroupper			; X32-NEXT: vzeroupper
	; X32-NEXT: retl			; X32-NEXT: retl
	▲ Show 20 Lines • Show All 771 Lines • Show Last 20 Lines

test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx \| FileCheck %s --check-prefix=CHECK --check-prefix=X86			; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx \| FileCheck %s --check-prefix=CHECK --check-prefix=X86
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx \| FileCheck %s --check-prefix=CHECK --check-prefix=X64			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx \| FileCheck %s --check-prefix=CHECK --check-prefix=X64

	; We don't check any vinsertf128 variant with immediate 0 because that's just a blend.			; We don't check any vinsertf128 variant with immediate 0 because that's just a blend.

				define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
				; AVX-LABEL: test_x86_avx_sqrt_pd_256:
				; AVX: # %bb.0:
				; AVX-NEXT: vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0]
				; AVX-NEXT: ret{{[l\|q]}} # encoding: [0xc3]
				;
				; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256:
				; AVX512VL: # %bb.0:
				; AVX512VL-NEXT: vsqrtpd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x51,0xc0]
				; AVX512VL-NEXT: ret{{[l\|q]}} # encoding: [0xc3]
				%res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
				ret <4 x double> %res
				}
				declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone

				define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
				; AVX-LABEL: test_x86_avx_sqrt_ps_256:
				; AVX: # %bb.0:
				; AVX-NEXT: vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0]
				; AVX-NEXT: ret{{[l\|q]}} # encoding: [0xc3]
				;
				; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256:
				; AVX512VL: # %bb.0:
				; AVX512VL-NEXT: vsqrtps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x51,0xc0]
				; AVX512VL-NEXT: ret{{[l\|q]}} # encoding: [0xc3]
				%res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
				ret <8 x float> %res
				}
				declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone

	define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) {			define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) {
	; CHECK-LABEL: test_x86_avx_vinsertf128_pd_256_1:			; CHECK-LABEL: test_x86_avx_vinsertf128_pd_256_1:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0			; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
	; CHECK-NEXT: ret{{[l\|q]}}			; CHECK-NEXT: ret{{[l\|q]}}
	%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 1)			%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 1)
	ret <4 x double> %res			ret <4 x double> %res
	}			}
	▲ Show 20 Lines • Show All 598 Lines • Show Last 20 Lines

test/CodeGen/X86/avx-intrinsics-x86.ll

	Show First 20 Lines • Show All 627 Lines • ▼ Show 20 Lines
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vrsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x52,0xc0]			; CHECK-NEXT: vrsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x52,0xc0]
	; CHECK-NEXT: ret{{[l\|q]}} # encoding: [0xc3]			; CHECK-NEXT: ret{{[l\|q]}} # encoding: [0xc3]
	%res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]			%res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
	ret <8 x float> %res			ret <8 x float> %res
	}			}
	declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone			declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone


	define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
	; AVX-LABEL: test_x86_avx_sqrt_pd_256:
	; AVX: # %bb.0:
	; AVX-NEXT: vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0]
	; AVX-NEXT: ret{{[l\|q]}} # encoding: [0xc3]
	;
	; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256:
	; AVX512VL: # %bb.0:
	; AVX512VL-NEXT: vsqrtpd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x51,0xc0]
	; AVX512VL-NEXT: ret{{[l\|q]}} # encoding: [0xc3]
	%res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
	ret <4 x double> %res
	}
	declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone


	define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
	; AVX-LABEL: test_x86_avx_sqrt_ps_256:
	; AVX: # %bb.0:
	; AVX-NEXT: vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0]
	; AVX-NEXT: ret{{[l\|q]}} # encoding: [0xc3]
	;
	; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256:
	; AVX512VL: # %bb.0:
	; AVX512VL-NEXT: vsqrtps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x51,0xc0]
	; AVX512VL-NEXT: ret{{[l\|q]}} # encoding: [0xc3]
	%res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
	ret <8 x float> %res
	}
	declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone


	define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {			define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
	; AVX-LABEL: test_x86_avx_vpermilvar_pd:			; AVX-LABEL: test_x86_avx_vpermilvar_pd:
	; AVX: # %bb.0:			; AVX: # %bb.0:
	; AVX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0d,0xc1]			; AVX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0d,0xc1]
	; AVX-NEXT: ret{{[l\|q]}} # encoding: [0xc3]			; AVX-NEXT: ret{{[l\|q]}} # encoding: [0xc3]
	;			;
	; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd:			; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd:
	; AVX512VL: # %bb.0:			; AVX512VL: # %bb.0:
	▲ Show 20 Lines • Show All 404 Lines • Show Last 20 Lines

test/CodeGen/X86/avx512-intrinsics-fast-isel.ll

	Show First 20 Lines • Show All 1,732 Lines • ▼ Show 20 Lines
	; X64-NEXT: vmovaps %ymm0, %ymm0			; X64-NEXT: vmovaps %ymm0, %ymm0
	; X64-NEXT: retq			; X64-NEXT: retq
	%res = shufflevector <4 x i64> %a0, <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			%res = shufflevector <4 x i64> %a0, <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	ret <8 x i64> %res			ret <8 x i64> %res
	}			}

	!0 = !{i32 1}			!0 = !{i32 1}

				define <2 x double> @test_mm_sqrt_round_sd(<2 x double> %__A, <2 x double> %__B) {
				; X32-LABEL: test_mm_sqrt_round_sd:
				; X32: # %bb.0: # %entry
				; X32-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
				; X32-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm_sqrt_round_sd:
				; X64: # %bb.0: # %entry
				; X64-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
				; X64-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
				; X64-NEXT: retq
				entry:
				%extract = extractelement <2 x double> %__A, i64 0
				%0 = tail call double @llvm.sqrt.f64(double %extract)
				%1 = insertelement <2 x double> %__B, double %0, i64 0
				ret <2 x double> %1
				}

				declare double @llvm.sqrt.f64(double) #1

				define <2 x double> @test_mm_mask_sqrt_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
				; X32-LABEL: test_mm_mask_sqrt_sd:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
				; X32-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm_mask_sqrt_sd:
				; X64: # %bb.0: # %entry
				; X64-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
				; X64-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
				; X64-NEXT: retq
				entry:
				%extract.i = extractelement <2 x double> %__A, i64 0
				%extract1.i = extractelement <2 x double> %__W, i64 0
				%0 = bitcast i8 %__U to <8 x i1>
				%extract2.i = extractelement <8 x i1> %0, i64 0
				%1 = tail call double @llvm.sqrt.f64(double %extract.i) #2
				%2 = select i1 %extract2.i, double %1, double %extract1.i
				%3 = insertelement <2 x double> %__B, double %2, i64 0
				ret <2 x double> %3
				}

				define <2 x double> @test_mm_mask_sqrt_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
				; X32-LABEL: test_mm_mask_sqrt_round_sd:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
				; X32-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm_mask_sqrt_round_sd:
				; X64: # %bb.0: # %entry
				; X64-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
				; X64-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
				; X64-NEXT: retq
				entry:
				%extract = extractelement <2 x double> %__A, i64 0
				%extract1 = extractelement <2 x double> %__W, i64 0
				%0 = bitcast i8 %__U to <8 x i1>
				%extract2 = extractelement <8 x i1> %0, i64 0
				%1 = tail call double @llvm.sqrt.f64(double %extract)
				%2 = select i1 %extract2, double %1, double %extract1
				%3 = insertelement <2 x double> %__B, double %2, i64 0
				ret <2 x double> %3
				}

				define <2 x double> @test_mm_maskz_sqrt_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
				; X32-LABEL: test_mm_maskz_sqrt_sd:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
				; X32-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1}
				; X32-NEXT: vmovsd {{.*#+}} xmm0 = xmm2[0],xmm1[1]
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm_maskz_sqrt_sd:
				; X64: # %bb.0: # %entry
				; X64-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
				; X64-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1}
				; X64-NEXT: vmovsd {{.*#+}} xmm0 = xmm2[0],xmm1[1]
				; X64-NEXT: retq
				entry:
				%extract.i = extractelement <2 x double> %__A, i64 0
				%0 = bitcast i8 %__U to <8 x i1>
				%extract2.i = extractelement <8 x i1> %0, i64 0
				%1 = tail call double @llvm.sqrt.f64(double %extract.i) #2
				%2 = select i1 %extract2.i, double %1, double 0.000000e+00
				%3 = insertelement <2 x double> %__B, double %2, i64 0
				ret <2 x double> %3
				}

				define <2 x double> @test_mm_maskz_sqrt_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
				; X32-LABEL: test_mm_maskz_sqrt_round_sd:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
				; X32-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1}
				; X32-NEXT: vmovsd {{.*#+}} xmm0 = xmm2[0],xmm1[1]
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm_maskz_sqrt_round_sd:
				; X64: # %bb.0: # %entry
				; X64-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
				; X64-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1}
				; X64-NEXT: vmovsd {{.*#+}} xmm0 = xmm2[0],xmm1[1]
				; X64-NEXT: retq
				entry:
				%extract = extractelement <2 x double> %__A, i64 0
				%0 = bitcast i8 %__U to <8 x i1>
				%extract2 = extractelement <8 x i1> %0, i64 0
				%1 = tail call double @llvm.sqrt.f64(double %extract)
				%2 = select i1 %extract2, double %1, double 0.000000e+00
				%3 = insertelement <2 x double> %__B, double %2, i64 0
				ret <2 x double> %3
				}

				define <4 x float> @test_mm_sqrt_round_ss(<4 x float> %__A, <4 x float> %__B) {
				; X32-LABEL: test_mm_sqrt_round_ss:
				; X32: # %bb.0: # %entry
				; X32-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
				; X32-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm_sqrt_round_ss:
				; X64: # %bb.0: # %entry
				; X64-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
				; X64-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
				; X64-NEXT: retq
				entry:
				%extract = extractelement <4 x float> %__A, i64 0
				%0 = tail call float @llvm.sqrt.f32(float %extract)
				%1 = insertelement <4 x float> %__B, float %0, i64 0
				ret <4 x float> %1
				}

				declare float @llvm.sqrt.f32(float) #1

				define <4 x float> @test_mm_mask_sqrt_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
				; X32-LABEL: test_mm_mask_sqrt_ss:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: vsqrtss %xmm1, %xmm1, %xmm1
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
				; X32-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm_mask_sqrt_ss:
				; X64: # %bb.0: # %entry
				; X64-NEXT: vsqrtss %xmm1, %xmm1, %xmm1
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
				; X64-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
				; X64-NEXT: retq
				entry:
				%extract.i = extractelement <4 x float> %__A, i64 0
				%extract1.i = extractelement <4 x float> %__W, i64 0
				%0 = bitcast i8 %__U to <8 x i1>
				%extract2.i = extractelement <8 x i1> %0, i64 0
				%1 = tail call float @llvm.sqrt.f32(float %extract.i) #2
				%2 = select i1 %extract2.i, float %1, float %extract1.i
				%3 = insertelement <4 x float> %__B, float %2, i64 0
				ret <4 x float> %3
				}

				define <4 x float> @test_mm_mask_sqrt_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
				; X32-LABEL: test_mm_mask_sqrt_round_ss:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: vsqrtss %xmm1, %xmm1, %xmm1
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
				; X32-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm_mask_sqrt_round_ss:
				; X64: # %bb.0: # %entry
				; X64-NEXT: vsqrtss %xmm1, %xmm1, %xmm1
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
				; X64-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
				; X64-NEXT: retq
				entry:
				%extract = extractelement <4 x float> %__A, i64 0
				%extract1 = extractelement <4 x float> %__W, i64 0
				%0 = bitcast i8 %__U to <8 x i1>
				%extract2 = extractelement <8 x i1> %0, i64 0
				%1 = tail call float @llvm.sqrt.f32(float %extract)
				%2 = select i1 %extract2, float %1, float %extract1
				%3 = insertelement <4 x float> %__B, float %2, i64 0
				ret <4 x float> %3
				}

				define <4 x float> @test_mm_maskz_sqrt_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
				; X32-LABEL: test_mm_maskz_sqrt_ss:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2
				; X32-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1}
				; X32-NEXT: vmovss {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3]
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm_maskz_sqrt_ss:
				; X64: # %bb.0: # %entry
				; X64-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2
				; X64-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1}
				; X64-NEXT: vmovss {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3]
				; X64-NEXT: retq
				entry:
				%extract.i = extractelement <4 x float> %__A, i64 0
				%0 = bitcast i8 %__U to <8 x i1>
				%extract2.i = extractelement <8 x i1> %0, i64 0
				%1 = tail call float @llvm.sqrt.f32(float %extract.i) #2
				%2 = select i1 %extract2.i, float %1, float 0.000000e+00
				%3 = insertelement <4 x float> %__B, float %2, i64 0
				ret <4 x float> %3
				}

				define <4 x float> @test_mm_maskz_sqrt_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
				; X32-LABEL: test_mm_maskz_sqrt_round_ss:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2
				; X32-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1}
				; X32-NEXT: vmovss {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3]
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm_maskz_sqrt_round_ss:
				; X64: # %bb.0: # %entry
				; X64-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2
				; X64-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1}
				; X64-NEXT: vmovss {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3]
				; X64-NEXT: retq
				entry:
				%extract = extractelement <4 x float> %__A, i64 0
				%0 = bitcast i8 %__U to <8 x i1>
				%extract2 = extractelement <8 x i1> %0, i64 0
				%1 = tail call float @llvm.sqrt.f32(float %extract)
				%2 = select i1 %extract2, float %1, float 0.000000e+00
				%3 = insertelement <4 x float> %__B, float %2, i64 0
				ret <4 x float> %3
				}

test/CodeGen/X86/avx512-intrinsics-upgrade.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl \| FileCheck %s			; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl \| FileCheck %s

				declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
				craig.topperUnsubmitted Not Done Reply Inline Actions This patch doesn't appear to have removed the scalar intrinsics. I dont' see any AutoUpgrade code or removal from X86InstrinsicsInfo.h craig.topper: This patch doesn't appear to have removed the scalar intrinsics. I dont' see any AutoUpgrade…
				tkrupaAuthorUnsubmitted Not Done Reply Inline Actions You're right, they only get lowered in clang. I gave the reasoning in the comment to the last upload. Is it enough to just move these 4 tests back to test/CodeGen/X86/avx512-intrinsics.ll or is it crucial to also lower them in LLVM part? tkrupa: You're right, they only get lowered in clang. I gave the reasoning in the comment to the last…
				craig.topperUnsubmitted Done Reply Inline Actions You can just move them back. But if clang isn't using them, make sure the GCCBuiltin is removed from IntrinsicsX86.td and leave a FIXME saying that they can be removed. There are lot of FIXMEs like that in that file already. craig.topper: You can just move them back. But if clang isn't using them, make sure the GCCBuiltin is removed…

				define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
				; CHECK-LABEL: test_sqrt_ss:
				; CHECK: ## %bb.0:
				; CHECK-NEXT: kmovw %edi, %k1
				; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm3
				; CHECK-NEXT: vmovaps %xmm2, %xmm4
				; CHECK-NEXT: vmovss %xmm3, %xmm1, %xmm4 {%k1}
				; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
				; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm3 {%k1} {z}
				; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0
				; CHECK-NEXT: vaddps %xmm2, %xmm4, %xmm1
				; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
				; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
				; CHECK-NEXT: retq
				%res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
				%res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
				%res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2)
				%res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3)

				%res.1 = fadd <4 x float> %res0, %res1
				%res.2 = fadd <4 x float> %res2, %res3
				%res = fadd <4 x float> %res.1, %res.2
				ret <4 x float> %res
				}

				declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone

				define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
				; CHECK-LABEL: test_sqrt_sd:
				; CHECK: ## %bb.0:
				; CHECK-NEXT: kmovw %edi, %k1
				; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm3
				; CHECK-NEXT: vmovapd %xmm2, %xmm4
				; CHECK-NEXT: vmovsd %xmm3, %xmm1, %xmm4 {%k1}
				; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
				; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm3 {%k1} {z}
				; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0
				; CHECK-NEXT: vaddpd %xmm2, %xmm4, %xmm1
				; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
				; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
				; CHECK-NEXT: retq
				%res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
				%res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
				%res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2)
				%res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3)

				%res.1 = fadd <2 x double> %res0, %res1
				%res.2 = fadd <2 x double> %res2, %res3
				%res = fadd <2 x double> %res.1, %res.2
				ret <2 x double> %res
				}

				define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
				; CHECK-LABEL: test_sqrt_pd_512:
				; CHECK: ## %bb.0:
				; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
				; CHECK-NEXT: retq
				%res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
				ret <8 x double> %res
				}
				declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone

				define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
				; CHECK-LABEL: test_sqrt_ps_512:
				; CHECK: ## %bb.0:
				; CHECK-NEXT: vsqrtps %zmm0, %zmm0
				; CHECK-NEXT: retq
				%res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
				ret <16 x float> %res
				}

				declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone

	declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone			declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone

	define i16 @unpckbw_test(i16 %a0, i16 %a1) {			define i16 @unpckbw_test(i16 %a0, i16 %a1) {
	; CHECK-LABEL: unpckbw_test:			; CHECK-LABEL: unpckbw_test:
	; CHECK: ## %bb.0:			; CHECK: ## %bb.0:
	; CHECK-NEXT: movzbl %dil, %eax			; CHECK-NEXT: movzbl %dil, %eax
	; CHECK-NEXT: shll $8, %esi			; CHECK-NEXT: shll $8, %esi
	; CHECK-NEXT: orl %esi, %eax			; CHECK-NEXT: orl %esi, %eax
	▲ Show 20 Lines • Show All 3,785 Lines • Show Last 20 Lines

test/CodeGen/X86/avx512-intrinsics.ll

	Show First 20 Lines • Show All 266 Lines • ▼ Show 20 Lines
	; CHECK: ## %bb.0:			; CHECK: ## %bb.0:
	; CHECK-NEXT: vrsqrt14ps %zmm0, %zmm0			; CHECK-NEXT: vrsqrt14ps %zmm0, %zmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]			%res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
	ret <16 x float> %res			ret <16 x float> %res
	}			}
	declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone			declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone

	define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
	; CHECK-LABEL: test_sqrt_pd_512:
	; CHECK: ## %bb.0:
	; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
	; CHECK-NEXT: retq
	%res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
	ret <8 x double> %res
	}
	declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone

	define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
	; CHECK-LABEL: test_sqrt_ps_512:
	; CHECK: ## %bb.0:
	; CHECK-NEXT: vsqrtps %zmm0, %zmm0
	; CHECK-NEXT: retq
	%res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
	ret <16 x float> %res
	}
	define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {			define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
	; CHECK-LABEL: test_sqrt_round_ps_512:			; CHECK-LABEL: test_sqrt_round_ps_512:
	; CHECK: ## %bb.0:			; CHECK: ## %bb.0:
	; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0			; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)			%res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
	ret <16 x float> %res			ret <16 x float> %res
	}			}
	Show All 31 Lines
	; CHECK: ## %bb.0:			; CHECK: ## %bb.0:
	; CHECK-NEXT: vgetexpps {sae}, %zmm0, %zmm0			; CHECK-NEXT: vgetexpps {sae}, %zmm0, %zmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)			%res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
	ret <16 x float> %res			ret <16 x float> %res
	}			}
	declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone			declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone

	declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone

	define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
	; CHECK-LABEL: test_sqrt_ss:
	; CHECK: ## %bb.0:
	; CHECK-NEXT: kmovw %edi, %k1
	; CHECK-NEXT: vmovaps %xmm2, %xmm3
	; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1}
	; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
	; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
	; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0
	; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
	; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
	; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
	; CHECK-NEXT: retq
	%res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
	%res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
	%res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2)
	%res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3)

	%res.1 = fadd <4 x float> %res0, %res1
	%res.2 = fadd <4 x float> %res2, %res3
	%res = fadd <4 x float> %res.1, %res.2
	ret <4 x float> %res
	}

	declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone

	define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
	; CHECK-LABEL: test_sqrt_sd:
	; CHECK: ## %bb.0:
	; CHECK-NEXT: kmovw %edi, %k1
	; CHECK-NEXT: vmovapd %xmm2, %xmm3
	; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1}
	; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
	; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
	; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0
	; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
	; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0
	; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
	; CHECK-NEXT: retq
	%res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
	%res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
	%res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2)
	%res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3)

	%res.1 = fadd <2 x double> %res0, %res1
	%res.2 = fadd <2 x double> %res2, %res3
	%res = fadd <2 x double> %res.1, %res.2
	ret <2 x double> %res
	}

	define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {			define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
	; CHECK-LABEL: test_x86_sse2_cvtsd2si64:			; CHECK-LABEL: test_x86_sse2_cvtsd2si64:
	; CHECK: ## %bb.0:			; CHECK: ## %bb.0:
	; CHECK-NEXT: vcvtsd2si %xmm0, %rax			; CHECK-NEXT: vcvtsd2si %xmm0, %rax
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]			%res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
	ret i64 %res			ret i64 %res
	}			}
	▲ Show 20 Lines • Show All 4,712 Lines • Show Last 20 Lines

test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll

	Show First 20 Lines • Show All 1,931 Lines • ▼ Show 20 Lines
	; X64-NEXT: retq			; X64-NEXT: retq
	%arg0 = bitcast i8 %a0 to <8 x i1>			%arg0 = bitcast i8 %a0 to <8 x i1>
	%res0 = shufflevector <8 x float> %a1, <8 x float> %a2, <8 x i32> <i32 0, i32 1, i32 8, i32 8, i32 4, i32 5, i32 12, i32 12>			%res0 = shufflevector <8 x float> %a1, <8 x float> %a2, <8 x i32> <i32 0, i32 1, i32 8, i32 8, i32 4, i32 5, i32 12, i32 12>
	%res1 = select <8 x i1> %arg0, <8 x float> %res0, <8 x float> zeroinitializer			%res1 = select <8 x i1> %arg0, <8 x float> %res0, <8 x float> zeroinitializer
	ret <8 x float> %res1			ret <8 x float> %res1
	}			}

	!0 = !{i32 1}			!0 = !{i32 1}

				define <2 x double> @test_mm_mask_sqrt_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A) {
				; X32-LABEL: test_mm_mask_sqrt_pd:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vsqrtpd %xmm1, %xmm0 {%k1}
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm_mask_sqrt_pd:
				; X64: # %bb.0: # %entry
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vsqrtpd %xmm1, %xmm0 {%k1}
				; X64-NEXT: retq
				entry:
				%0 = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %__A) #2
				%1 = bitcast i8 %__U to <8 x i1>
				%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
				%2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__W
				ret <2 x double> %2
				}

				declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)

				define <2 x double> @test_mm_maskz_sqrt_pd(i8 zeroext %__U, <2 x double> %__A) {
				; X32-LABEL: test_mm_maskz_sqrt_pd:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vsqrtpd %xmm0, %xmm0 {%k1} {z}
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm_maskz_sqrt_pd:
				; X64: # %bb.0: # %entry
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vsqrtpd %xmm0, %xmm0 {%k1} {z}
				; X64-NEXT: retq
				entry:
				%0 = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %__A) #2
				%1 = bitcast i8 %__U to <8 x i1>
				%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
				%2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> zeroinitializer
				ret <2 x double> %2
				}

				define <4 x double> @test_mm256_mask_sqrt_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A) {
				; X32-LABEL: test_mm256_mask_sqrt_pd:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vsqrtpd %ymm1, %ymm0 {%k1}
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm256_mask_sqrt_pd:
				; X64: # %bb.0: # %entry
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vsqrtpd %ymm1, %ymm0 {%k1}
				; X64-NEXT: retq
				entry:
				%0 = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> %__A) #2
				%1 = bitcast i8 %__U to <8 x i1>
				%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
				%2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__W
				ret <4 x double> %2
				}

				declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)

				define <4 x double> @test_mm256_maskz_sqrt_pd(i8 zeroext %__U, <4 x double> %__A) {
				; X32-LABEL: test_mm256_maskz_sqrt_pd:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z}
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm256_maskz_sqrt_pd:
				; X64: # %bb.0: # %entry
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z}
				; X64-NEXT: retq
				entry:
				%0 = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> %__A) #2
				%1 = bitcast i8 %__U to <8 x i1>
				%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
				%2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> zeroinitializer
				ret <4 x double> %2
				}

				define <4 x float> @test_mm_mask_sqrt_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A) {
				; X32-LABEL: test_mm_mask_sqrt_ps:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vsqrtps %xmm1, %xmm0 {%k1}
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm_mask_sqrt_ps:
				; X64: # %bb.0: # %entry
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vsqrtps %xmm1, %xmm0 {%k1}
				; X64-NEXT: retq
				entry:
				%0 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %__A) #2
				%1 = bitcast i8 %__U to <8 x i1>
				%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
				%2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__W
				ret <4 x float> %2
				}

				declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)

				define <4 x float> @test_mm_maskz_sqrt_ps(i8 zeroext %__U, <4 x float> %__A) {
				; X32-LABEL: test_mm_maskz_sqrt_ps:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vsqrtps %xmm0, %xmm0 {%k1} {z}
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm_maskz_sqrt_ps:
				; X64: # %bb.0: # %entry
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vsqrtps %xmm0, %xmm0 {%k1} {z}
				; X64-NEXT: retq
				entry:
				%0 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %__A) #2
				%1 = bitcast i8 %__U to <8 x i1>
				%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
				%2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> zeroinitializer
				ret <4 x float> %2
				}

				define <8 x float> @test_mm256_mask_sqrt_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A) {
				; X32-LABEL: test_mm256_mask_sqrt_ps:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vsqrtps %ymm1, %ymm0 {%k1}
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm256_mask_sqrt_ps:
				; X64: # %bb.0: # %entry
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vsqrtps %ymm1, %ymm0 {%k1}
				; X64-NEXT: retq
				entry:
				%0 = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %__A) #2
				%1 = bitcast i8 %__U to <8 x i1>
				%2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %__W
				ret <8 x float> %2
				}

				define <8 x float> @test_mm256_maskz_sqrt_ps(i8 zeroext %__U, <8 x float> %__A) {
				; X32-LABEL: test_mm256_maskz_sqrt_ps:
				; X32: # %bb.0: # %entry
				; X32-NEXT: movb {{[0-9]+}}(%esp), %al
				; X32-NEXT: kmovw %eax, %k1
				; X32-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z}
				; X32-NEXT: retl
				;
				; X64-LABEL: test_mm256_maskz_sqrt_ps:
				; X64: # %bb.0: # %entry
				; X64-NEXT: kmovw %edi, %k1
				; X64-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z}
				; X64-NEXT: retq
				entry:
				%0 = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %__A) #2
				%1 = bitcast i8 %__U to <8 x i1>
				%2 = select <8 x i1> %1, <8 x float> %0, <8 x float> zeroinitializer
				ret <8 x float> %2
				}

				declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)

test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 6,134 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ## kill: def %al killed %al killed %eax			; CHECK-NEXT: ## kill: def %al killed %al killed %eax
	; CHECK-NEXT: retq ## encoding: [0xc3]			; CHECK-NEXT: retq ## encoding: [0xc3]
	%res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)			%res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
	%res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)			%res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
	%res2 = add i8 %res, %res1			%res2 = add i8 %res, %res1
	ret i8 %res2			ret i8 %res2
	}			}

				define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) {
				; CHECK-LABEL: test_sqrt_pd_256:
				; CHECK: ## %bb.0:
				; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
				; CHECK-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
				; CHECK-NEXT: retq ## encoding: [0xc3]
				%res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
				ret <4 x double> %res
				}
				declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone

				define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) {
				; CHECK-LABEL: test_sqrt_ps_256:
				; CHECK: ## %bb.0:
				; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
				; CHECK-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
				; CHECK-NEXT: retq ## encoding: [0xc3]
				%res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
				ret <8 x float> %res
				}

				declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone

test/CodeGen/X86/avx512vl-intrinsics.ll

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 899 Lines • ▼ Show 20 Lines
	; CHECK: ## %bb.0:			; CHECK: ## %bb.0:
	; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]			; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]
	; CHECK-NEXT: retq ## encoding: [0xc3]			; CHECK-NEXT: retq ## encoding: [0xc3]
	%1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)			%1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
	ret <4 x float> %1			ret <4 x float> %1
	}			}
	declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)			declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)

	define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) {
	; CHECK-LABEL: test_sqrt_pd_256:
	; CHECK: ## %bb.0:
	; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
	; CHECK-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
	; CHECK-NEXT: retq ## encoding: [0xc3]
	%res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
	ret <4 x double> %res
	}
	declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone

	define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) {
	; CHECK-LABEL: test_sqrt_ps_256:
	; CHECK: ## %bb.0:
	; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
	; CHECK-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
	; CHECK-NEXT: retq ## encoding: [0xc3]
	%res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
	ret <8 x float> %res
	}

	declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone

	define <4 x double> @test_getexp_pd_256(<4 x double> %a0) {			define <4 x double> @test_getexp_pd_256(<4 x double> %a0) {
	; CHECK-LABEL: test_getexp_pd_256:			; CHECK-LABEL: test_getexp_pd_256:
	; CHECK: ## %bb.0:			; CHECK: ## %bb.0:
	; CHECK-NEXT: vgetexppd %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x42,0xc0]			; CHECK-NEXT: vgetexppd %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x42,0xc0]
	; CHECK-NEXT: retq ## encoding: [0xc3]			; CHECK-NEXT: retq ## encoding: [0xc3]
	%res = call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)			%res = call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
	ret <4 x double> %res			ret <4 x double> %res
	}			}
	▲ Show 20 Lines • Show All 4,253 Lines • Show Last 20 Lines

test/CodeGen/X86/sse-intrinsics-fast-isel.ll

	Show First 20 Lines • Show All 1,571 Lines • ▼ Show 20 Lines
	; X32: # %bb.0:			; X32: # %bb.0:
	; X32-NEXT: sqrtps %xmm0, %xmm0			; X32-NEXT: sqrtps %xmm0, %xmm0
	; X32-NEXT: retl			; X32-NEXT: retl
	;			;
	; X64-LABEL: test_mm_sqrt_ps:			; X64-LABEL: test_mm_sqrt_ps:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: sqrtps %xmm0, %xmm0			; X64-NEXT: sqrtps %xmm0, %xmm0
	; X64-NEXT: retq			; X64-NEXT: retq
	%res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)			%res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0)
	ret <4 x float> %res			ret <4 x float> %res
	}			}
	declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone			declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readnone

	define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) {			define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) {
	; X32-LABEL: test_mm_sqrt_ss:			; X32-LABEL: test_mm_sqrt_ss:
	; X32: # %bb.0:			; X32: # %bb.0:
	; X32-NEXT: sqrtss %xmm0, %xmm0			; X32-NEXT: sqrtss %xmm0, %xmm0
	; X32-NEXT: retl			; X32-NEXT: retl
	;			;
	; X64-LABEL: test_mm_sqrt_ss:			; X64-LABEL: test_mm_sqrt_ss:
	▲ Show 20 Lines • Show All 480 Lines • Show Last 20 Lines

test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 \| FileCheck %s			; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 \| FileCheck %s


				define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
				; CHECK-LABEL: test_x86_sse_sqrt_ps:
				; CHECK: ## %bb.0:
				; CHECK-NEXT: sqrtps %xmm0, %xmm0
				; CHECK-NEXT: retl
				%res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
				ret <4 x float> %res
				}
				declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone


	define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {			define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
	; CHECK-LABEL: test_x86_sse_storeu_ps:			; CHECK-LABEL: test_x86_sse_storeu_ps:
	; CHECK: ## %bb.0:			; CHECK: ## %bb.0:
	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax			; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: movups %xmm0, (%eax)			; CHECK-NEXT: movups %xmm0, (%eax)
	; CHECK-NEXT: retl			; CHECK-NEXT: retl
	call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)			call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
	ret void			ret void
	▲ Show 20 Lines • Show All 47 Lines • Show Last 20 Lines

test/CodeGen/X86/sse-intrinsics-x86.ll

	Show First 20 Lines • Show All 452 Lines • ▼ Show 20 Lines
	; VCHECK: ## %bb.0:			; VCHECK: ## %bb.0:
	; VCHECK-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x52,0xc0]			; VCHECK-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x52,0xc0]
	; VCHECK-NEXT: retl ## encoding: [0xc3]			; VCHECK-NEXT: retl ## encoding: [0xc3]
	%res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]			%res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
	ret <4 x float> %res			ret <4 x float> %res
	}			}
	declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone			declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone


	define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
	; SSE-LABEL: test_x86_sse_sqrt_ps:
	; SSE: ## %bb.0:
	; SSE-NEXT: sqrtps %xmm0, %xmm0 ## encoding: [0x0f,0x51,0xc0]
	; SSE-NEXT: retl ## encoding: [0xc3]
	;
	; AVX2-LABEL: test_x86_sse_sqrt_ps:
	; AVX2: ## %bb.0:
	; AVX2-NEXT: vsqrtps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x51,0xc0]
	; AVX2-NEXT: retl ## encoding: [0xc3]
	;
	; SKX-LABEL: test_x86_sse_sqrt_ps:
	; SKX: ## %bb.0:
	; SKX-NEXT: vsqrtps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0]
	; SKX-NEXT: retl ## encoding: [0xc3]
	%res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
	ret <4 x float> %res
	}
	declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone


	define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {			define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
				RKSimonUnsubmitted Done Reply Inline Actions Why did you move this test? RKSimon: Why did you move this test?
				uriel.kUnsubmitted Not Done Reply Inline Actions You are right, my mistake. fixed. uriel.k: You are right, my mistake. fixed.
	; SSE-LABEL: test_x86_sse_sqrt_ss:			; SSE-LABEL: test_x86_sse_sqrt_ss:
	; SSE: ## %bb.0:			; SSE: ## %bb.0:
	; SSE-NEXT: sqrtss %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x51,0xc0]			; SSE-NEXT: sqrtss %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x51,0xc0]
	; SSE-NEXT: retl ## encoding: [0xc3]			; SSE-NEXT: retl ## encoding: [0xc3]
	;			;
	; AVX2-LABEL: test_x86_sse_sqrt_ss:			; AVX2-LABEL: test_x86_sse_sqrt_ss:
	; AVX2: ## %bb.0:			; AVX2: ## %bb.0:
	; AVX2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x51,0xc0]			; AVX2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x51,0xc0]
	▲ Show 20 Lines • Show All 218 Lines • Show Last 20 Lines

test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

	Show First 20 Lines • Show All 2,942 Lines • ▼ Show 20 Lines
	; X32: # %bb.0:			; X32: # %bb.0:
	; X32-NEXT: sqrtpd %xmm0, %xmm0			; X32-NEXT: sqrtpd %xmm0, %xmm0
	; X32-NEXT: retl			; X32-NEXT: retl
	;			;
	; X64-LABEL: test_mm_sqrt_pd:			; X64-LABEL: test_mm_sqrt_pd:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: sqrtpd %xmm0, %xmm0			; X64-NEXT: sqrtpd %xmm0, %xmm0
	; X64-NEXT: retq			; X64-NEXT: retq
	%res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)			%res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0)
	ret <2 x double> %res			ret <2 x double> %res
	}			}
	declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone			declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind readnone
				RKSimonUnsubmitted Done Reply Inline Actions Shouldn't that be llvm.sqrt.v2f64? RKSimon: Shouldn't that be llvm.sqrt.v2f64?
				uriel.kUnsubmitted Not Done Reply Inline Actions fixed. uriel.k: fixed.

	define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {			define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
	; X32-LABEL: test_mm_sqrt_sd:			; X32-LABEL: test_mm_sqrt_sd:
	; X32: # %bb.0:			; X32: # %bb.0:
	; X32-NEXT: sqrtsd %xmm0, %xmm1			; X32-NEXT: sqrtsd %xmm0, %xmm1
	; X32-NEXT: movapd %xmm1, %xmm0			; X32-NEXT: movapd %xmm1, %xmm0
	; X32-NEXT: retl			; X32-NEXT: retl
	;			;
	▲ Show 20 Lines • Show All 937 Lines • Show Last 20 Lines

test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 \| FileCheck %s			; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 \| FileCheck %s


				define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
				; CHECK-LABEL: test_x86_sse2_sqrt_pd:
				; CHECK: ## %bb.0:
				; CHECK-NEXT: sqrtpd %xmm0, %xmm0
				; CHECK-NEXT: retl
				%res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
				ret <2 x double> %res
				}
				declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone


	define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {			define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
	; CHECK-LABEL: test_x86_sse2_psll_dq_bs:			; CHECK-LABEL: test_x86_sse2_psll_dq_bs:
	; CHECK: ## %bb.0:			; CHECK: ## %bb.0:
	; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]			; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
	; CHECK-NEXT: retl			; CHECK-NEXT: retl
				RKSimonUnsubmitted Done Reply Inline Actions Strip these checks and regenerate RKSimon: Strip these checks and regenerate
	%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]			%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
	ret <2 x i64> %res			ret <2 x i64> %res
	}			}
	declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone			declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone


	define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {			define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
	; CHECK-LABEL: test_x86_sse2_psrl_dq_bs:			; CHECK-LABEL: test_x86_sse2_psrl_dq_bs:
	▲ Show 20 Lines • Show All 233 Lines • Show Last 20 Lines

test/CodeGen/X86/sse2-intrinsics-x86.ll

	Show First 20 Lines • Show All 1,579 Lines • ▼ Show 20 Lines
	; SKX: ## %bb.0:			; SKX: ## %bb.0:
	; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]			; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
	; SKX-NEXT: retl ## encoding: [0xc3]			; SKX-NEXT: retl ## encoding: [0xc3]
	%res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]			%res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
	ret <8 x i16> %res			ret <8 x i16> %res
	}			}
	declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone			declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone


	define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
	; SSE-LABEL: test_x86_sse2_sqrt_pd:
	; SSE: ## %bb.0:
	; SSE-NEXT: sqrtpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x51,0xc0]
	; SSE-NEXT: retl ## encoding: [0xc3]
	;
	; AVX2-LABEL: test_x86_sse2_sqrt_pd:
	; AVX2: ## %bb.0:
	; AVX2-NEXT: vsqrtpd %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x51,0xc0]
	; AVX2-NEXT: retl ## encoding: [0xc3]
	;
	; SKX-LABEL: test_x86_sse2_sqrt_pd:
	; SKX: ## %bb.0:
	; SKX-NEXT: vsqrtpd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0]
	; SKX-NEXT: retl ## encoding: [0xc3]
	%res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
	ret <2 x double> %res
	}
	declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone


	define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {			define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
	; SSE-LABEL: test_x86_sse2_sqrt_sd:			; SSE-LABEL: test_x86_sse2_sqrt_sd:
	; SSE: ## %bb.0:			; SSE: ## %bb.0:
	; SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]			; SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
	; SSE-NEXT: retl ## encoding: [0xc3]			; SSE-NEXT: retl ## encoding: [0xc3]
	;			;
	; AVX2-LABEL: test_x86_sse2_sqrt_sd:			; AVX2-LABEL: test_x86_sse2_sqrt_sd:
	; AVX2: ## %bb.0:			; AVX2: ## %bb.0:
	▲ Show 20 Lines • Show All 253 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Lowering X86 avx512 sqrt intrinsics to IR - LLVM
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 128383

include/llvm/IR/IntrinsicsX86.td

lib/IR/AutoUpgrade.cpp

lib/Target/X86/X86IntrinsicsInfo.h

test/CodeGen/X86/avx-intrinsics-fast-isel.ll

test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll

test/CodeGen/X86/avx-intrinsics-x86.ll

test/CodeGen/X86/avx512-intrinsics-fast-isel.ll

test/CodeGen/X86/avx512-intrinsics-upgrade.ll

test/CodeGen/X86/avx512-intrinsics.ll

test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll

test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll

test/CodeGen/X86/avx512vl-intrinsics.ll

test/CodeGen/X86/sse-intrinsics-fast-isel.ll

test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll

test/CodeGen/X86/sse-intrinsics-x86.ll

test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll

test/CodeGen/X86/sse2-intrinsics-x86.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Lowering X86 avx512 sqrt intrinsics to IR - LLVMClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 128383

include/llvm/IR/IntrinsicsX86.td

lib/IR/AutoUpgrade.cpp

lib/Target/X86/X86IntrinsicsInfo.h

test/CodeGen/X86/avx-intrinsics-fast-isel.ll

test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll

test/CodeGen/X86/avx-intrinsics-x86.ll

test/CodeGen/X86/avx512-intrinsics-fast-isel.ll

test/CodeGen/X86/avx512-intrinsics-upgrade.ll

test/CodeGen/X86/avx512-intrinsics.ll

test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll

test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll

test/CodeGen/X86/avx512vl-intrinsics.ll

test/CodeGen/X86/sse-intrinsics-fast-isel.ll

test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll

test/CodeGen/X86/sse-intrinsics-x86.ll

test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll

test/CodeGen/X86/sse2-intrinsics-x86.ll

[X86] Lowering X86 avx512 sqrt intrinsics to IR - LLVM
ClosedPublic