diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -11763,6 +11763,7 @@ return EmitX86CpuInit(); SmallVector Ops; + bool IsMaskFCmp = false; // Find out if any arguments are required to be integer constant expressions. unsigned ICEArguments = 0; @@ -13661,16 +13662,18 @@ case X86::BI__builtin_ia32_cmpordps: case X86::BI__builtin_ia32_cmpordpd: return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false); - case X86::BI__builtin_ia32_cmpps: - case X86::BI__builtin_ia32_cmpps256: - case X86::BI__builtin_ia32_cmppd: - case X86::BI__builtin_ia32_cmppd256: case X86::BI__builtin_ia32_cmpps128_mask: case X86::BI__builtin_ia32_cmpps256_mask: case X86::BI__builtin_ia32_cmpps512_mask: case X86::BI__builtin_ia32_cmppd128_mask: case X86::BI__builtin_ia32_cmppd256_mask: - case X86::BI__builtin_ia32_cmppd512_mask: { + case X86::BI__builtin_ia32_cmppd512_mask: + IsMaskFCmp = true; + LLVM_FALLTHROUGH; + case X86::BI__builtin_ia32_cmpps: + case X86::BI__builtin_ia32_cmpps256: + case X86::BI__builtin_ia32_cmppd: + case X86::BI__builtin_ia32_cmppd256: { // Lowering vector comparisons to fcmp instructions, while // ignoring signalling behaviour requested // ignoring rounding mode requested @@ -13715,8 +13718,11 @@ // If the predicate is true or false and we're using constrained intrinsics, // we don't have a compare intrinsic we can use. Just use the legacy X86 // specific intrinsic. - if ((Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE) && - Builder.getIsFPConstrained()) { + // If the intrinsic is mask enabled and we're using constrained intrinsics, + // use the legacy X86 specific intrinsic. + if (Builder.getIsFPConstrained() && + (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE || + IsMaskFCmp)) { Intrinsic::ID IID; switch (BuiltinID) { @@ -13734,36 +13740,32 @@ IID = Intrinsic::x86_avx_cmp_pd_256; break; case X86::BI__builtin_ia32_cmpps512_mask: - IID = Intrinsic::x86_avx512_cmp_ps_512; + IID = Intrinsic::x86_avx512_mask_cmp_ps_512; break; case X86::BI__builtin_ia32_cmppd512_mask: - IID = Intrinsic::x86_avx512_cmp_pd_512; + IID = Intrinsic::x86_avx512_mask_cmp_pd_512; break; case X86::BI__builtin_ia32_cmpps128_mask: - IID = Intrinsic::x86_avx512_cmp_ps_128; + IID = Intrinsic::x86_avx512_mask_cmp_ps_128; break; case X86::BI__builtin_ia32_cmpps256_mask: - IID = Intrinsic::x86_avx512_cmp_ps_256; + IID = Intrinsic::x86_avx512_mask_cmp_ps_256; break; case X86::BI__builtin_ia32_cmppd128_mask: - IID = Intrinsic::x86_avx512_cmp_pd_128; + IID = Intrinsic::x86_avx512_mask_cmp_pd_128; break; case X86::BI__builtin_ia32_cmppd256_mask: - IID = Intrinsic::x86_avx512_cmp_pd_256; + IID = Intrinsic::x86_avx512_mask_cmp_pd_256; break; } Function *Intr = CGM.getIntrinsic(IID); - if (cast(Intr->getReturnType()) - ->getElementType() - ->isIntegerTy(1)) { + if (IsMaskFCmp) { unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); - Value *MaskIn = Ops[3]; - Ops.erase(&Ops[3]); - + Ops[3] = getMaskVecValue(*this, Ops[3], NumElts); Value *Cmp = Builder.CreateCall(Intr, Ops); - return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn); + return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr); } return Builder.CreateCall(Intr, Ops); @@ -13771,14 +13773,9 @@ // Builtins without the _mask suffix return a vector of integers // of the same width as the input vectors - switch (BuiltinID) { - case X86::BI__builtin_ia32_cmpps512_mask: - case X86::BI__builtin_ia32_cmppd512_mask: - case X86::BI__builtin_ia32_cmpps128_mask: - case X86::BI__builtin_ia32_cmpps256_mask: - case X86::BI__builtin_ia32_cmppd128_mask: - case X86::BI__builtin_ia32_cmppd256_mask: { - // FIXME: Support SAE. + if (IsMaskFCmp) { + // We ignore SAE if strict FP is disabled. We only keep precise + // exception behavior under strict FP. unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); Value *Cmp; @@ -13788,9 +13785,8 @@ Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]); } - default: - return getVectorFCmpIR(Pred, IsSignaling); - } + + return getVectorFCmpIR(Pred, IsSignaling); } // SSE scalar comparison intrinsics diff --git a/clang/test/CodeGen/avx512f-builtins-constrained-cmp.c b/clang/test/CodeGen/avx512f-builtins-constrained-cmp.c --- a/clang/test/CodeGen/avx512f-builtins-constrained-cmp.c +++ b/clang/test/CodeGen/avx512f-builtins-constrained-cmp.c @@ -4,858 +4,792 @@ __mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmp_round_ps_mask - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 0, <16 x i1> {{.*}}, i32 8) #2 return _mm512_cmp_round_ps_mask(a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC); } __mmask16 test_mm512_mask_cmp_round_ps_mask(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmp_round_ps_mask - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 0, <16 x i1> {{.*}}, i32 8) #2 return _mm512_mask_cmp_round_ps_mask(m, a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC); } __mmask16 test_mm512_cmp_ps_mask_eq_oq(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmp_ps_mask_eq_oq - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 0, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ); } __mmask16 test_mm512_cmp_ps_mask_lt_os(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_lt_os - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 1, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_LT_OS); } __mmask16 test_mm512_cmp_ps_mask_le_os(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_le_os - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 2, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_LE_OS); } __mmask16 test_mm512_cmp_ps_mask_unord_q(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_unord_q - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 3, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q); } __mmask16 test_mm512_cmp_ps_mask_neq_uq(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_neq_uq - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ); } __mmask16 test_mm512_cmp_ps_mask_nlt_us(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_nlt_us - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 5, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_NLT_US); } __mmask16 test_mm512_cmp_ps_mask_nle_us(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_nle_us - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 6, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_NLE_US); } __mmask16 test_mm512_cmp_ps_mask_ord_q(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_ord_q - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 7, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_ORD_Q); } __mmask16 test_mm512_cmp_ps_mask_eq_uq(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_eq_uq - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 8, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_EQ_UQ); } __mmask16 test_mm512_cmp_ps_mask_nge_us(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_nge_us - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 9, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_NGE_US); } __mmask16 test_mm512_cmp_ps_mask_ngt_us(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_ngt_us - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 10, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_NGT_US); } __mmask16 test_mm512_cmp_ps_mask_false_oq(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_false_oq - // CHECK: call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 11, i32 4) + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 11, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_FALSE_OQ); } __mmask16 test_mm512_cmp_ps_mask_neq_oq(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_neq_oq - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 12, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_NEQ_OQ); } __mmask16 test_mm512_cmp_ps_mask_ge_os(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_ge_os - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 13, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_GE_OS); } __mmask16 test_mm512_cmp_ps_mask_gt_os(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_gt_os - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 14, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_GT_OS); } __mmask16 test_mm512_cmp_ps_mask_true_uq(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_true_uq - // CHECK: call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 15, i32 4) + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 15, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_TRUE_UQ); } __mmask16 test_mm512_cmp_ps_mask_eq_os(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_eq_os - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 16, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_EQ_OS); } __mmask16 test_mm512_cmp_ps_mask_lt_oq(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_lt_oq - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 17, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ); } __mmask16 test_mm512_cmp_ps_mask_le_oq(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_le_oq - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 18, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ); } __mmask16 test_mm512_cmp_ps_mask_unord_s(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_unord_s - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 19, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_UNORD_S); } __mmask16 test_mm512_cmp_ps_mask_neq_us(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_neq_us - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 20, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_NEQ_US); } __mmask16 test_mm512_cmp_ps_mask_nlt_uq(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_nlt_uq - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 21, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_NLT_UQ); } __mmask16 test_mm512_cmp_ps_mask_nle_uq(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_nle_uq - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 22, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_NLE_UQ); } __mmask16 test_mm512_cmp_ps_mask_ord_s(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_ord_s - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 23, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_ORD_S); } __mmask16 test_mm512_cmp_ps_mask_eq_us(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_eq_us - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 24, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_EQ_US); } __mmask16 test_mm512_cmp_ps_mask_nge_uq(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_nge_uq - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 25, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_NGE_UQ); } __mmask16 test_mm512_cmp_ps_mask_ngt_uq(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_ngt_uq - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 26, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_NGT_UQ); } __mmask16 test_mm512_cmp_ps_mask_false_os(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_false_os - // CHECK: call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 27, i32 4) + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 27, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_FALSE_OS); } __mmask16 test_mm512_cmp_ps_mask_neq_os(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_neq_os - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 28, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_NEQ_OS); } __mmask16 test_mm512_cmp_ps_mask_ge_oq(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_ge_oq - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 29, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_GE_OQ); } __mmask16 test_mm512_cmp_ps_mask_gt_oq(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_gt_oq - // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 30, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_GT_OQ); } __mmask16 test_mm512_cmp_ps_mask_true_us(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_ps_mask_true_us - // CHECK: call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 31, i32 4) + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 31, <16 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_ps_mask(a, b, _CMP_TRUE_US); } __mmask16 test_mm512_mask_cmp_ps_mask_eq_oq(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmp_ps_mask_eq_oq - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 0, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ); } __mmask16 test_mm512_mask_cmp_ps_mask_lt_os(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_lt_os - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 1, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS); } __mmask16 test_mm512_mask_cmp_ps_mask_le_os(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_le_os - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 2, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS); } __mmask16 test_mm512_mask_cmp_ps_mask_unord_q(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_unord_q - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 3, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q); } __mmask16 test_mm512_mask_cmp_ps_mask_neq_uq(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_neq_uq - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ); } __mmask16 test_mm512_mask_cmp_ps_mask_nlt_us(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nlt_us - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 5, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US); } __mmask16 test_mm512_mask_cmp_ps_mask_nle_us(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nle_us - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 6, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US); } __mmask16 test_mm512_mask_cmp_ps_mask_ord_q(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ord_q - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 7, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q); } __mmask16 test_mm512_mask_cmp_ps_mask_eq_uq(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_eq_uq - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 8, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_UQ); } __mmask16 test_mm512_mask_cmp_ps_mask_nge_us(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nge_us - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 9, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NGE_US); } __mmask16 test_mm512_mask_cmp_ps_mask_ngt_us(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ngt_us - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 10, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NGT_US); } __mmask16 test_mm512_mask_cmp_ps_mask_false_oq(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_false_oq - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 11, i32 4) - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 11, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OQ); } __mmask16 test_mm512_mask_cmp_ps_mask_neq_oq(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_neq_oq - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 12, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OQ); } __mmask16 test_mm512_mask_cmp_ps_mask_ge_os(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ge_os - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 13, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_GE_OS); } __mmask16 test_mm512_mask_cmp_ps_mask_gt_os(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_gt_os - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 14, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_GT_OS); } __mmask16 test_mm512_mask_cmp_ps_mask_true_uq(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_true_uq - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 15, i32 4) - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 15, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_UQ); } __mmask16 test_mm512_mask_cmp_ps_mask_eq_os(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_eq_os - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 16, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OS); } __mmask16 test_mm512_mask_cmp_ps_mask_lt_oq(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_lt_oq - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 17, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OQ); } __mmask16 test_mm512_mask_cmp_ps_mask_le_oq(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_le_oq - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 18, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OQ); } __mmask16 test_mm512_mask_cmp_ps_mask_unord_s(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_unord_s - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 19, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_S); } __mmask16 test_mm512_mask_cmp_ps_mask_neq_us(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_neq_us - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 20, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_US); } __mmask16 test_mm512_mask_cmp_ps_mask_nlt_uq(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nlt_uq - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 21, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_UQ); } __mmask16 test_mm512_mask_cmp_ps_mask_nle_uq(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nle_uq - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 22, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_UQ); } __mmask16 test_mm512_mask_cmp_ps_mask_ord_s(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ord_s - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 23, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_S); } __mmask16 test_mm512_mask_cmp_ps_mask_eq_us(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_eq_us - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 24, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_US); } __mmask16 test_mm512_mask_cmp_ps_mask_nge_uq(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nge_uq - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 25, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NGE_UQ); } __mmask16 test_mm512_mask_cmp_ps_mask_ngt_uq(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ngt_uq - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 26, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NGT_UQ); } __mmask16 test_mm512_mask_cmp_ps_mask_false_os(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_false_os - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 27, i32 4) - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 27, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OS); } __mmask16 test_mm512_mask_cmp_ps_mask_neq_os(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_neq_os - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 28, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OS); } __mmask16 test_mm512_mask_cmp_ps_mask_ge_oq(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ge_oq - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 29, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_GE_OQ); } __mmask16 test_mm512_mask_cmp_ps_mask_gt_oq(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_gt_oq - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 30, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_GT_OQ); } __mmask16 test_mm512_mask_cmp_ps_mask_true_us(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_true_us - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 31, i32 4) - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 31, <16 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_US); } __mmask8 test_mm512_cmp_round_pd_mask(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmp_round_pd_mask - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 0, <8 x i1> {{.*}}, i32 8) #2 return _mm512_cmp_round_pd_mask(a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC); } __mmask8 test_mm512_mask_cmp_round_pd_mask(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmp_round_pd_mask - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 0, <8 x i1> {{.*}}, i32 8) #2 return _mm512_mask_cmp_round_pd_mask(m, a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC); } __mmask8 test_mm512_cmp_pd_mask_eq_oq(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmp_pd_mask_eq_oq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 0, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ); } __mmask8 test_mm512_cmp_pd_mask_lt_os(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_lt_os - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 1, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_LT_OS); } __mmask8 test_mm512_cmp_pd_mask_le_os(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_le_os - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 2, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_LE_OS); } __mmask8 test_mm512_cmp_pd_mask_unord_q(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_unord_q - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 3, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q); } __mmask8 test_mm512_cmp_pd_mask_neq_uq(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_neq_uq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ); } __mmask8 test_mm512_cmp_pd_mask_nlt_us(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_nlt_us - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 5, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_NLT_US); } __mmask8 test_mm512_cmp_pd_mask_nle_us(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_nle_us - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 6, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_NLE_US); } __mmask8 test_mm512_cmp_pd_mask_ord_q(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_ord_q - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 7, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_ORD_Q); } __mmask8 test_mm512_cmp_pd_mask_eq_uq(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_eq_uq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 8, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_EQ_UQ); } __mmask8 test_mm512_cmp_pd_mask_nge_us(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_nge_us - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 9, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_NGE_US); } __mmask8 test_mm512_cmp_pd_mask_ngt_us(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_ngt_us - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 10, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_NGT_US); } __mmask8 test_mm512_cmp_pd_mask_false_oq(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_false_oq - // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 11, i32 4) + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 11, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_FALSE_OQ); } __mmask8 test_mm512_cmp_pd_mask_neq_oq(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_neq_oq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 12, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_NEQ_OQ); } __mmask8 test_mm512_cmp_pd_mask_ge_os(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_ge_os - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 13, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_GE_OS); } __mmask8 test_mm512_cmp_pd_mask_gt_os(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_gt_os - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 14, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_GT_OS); } __mmask8 test_mm512_cmp_pd_mask_true_uq(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_true_uq - // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 15, i32 4) + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 15, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_TRUE_UQ); } __mmask8 test_mm512_cmp_pd_mask_eq_os(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_eq_os - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 16, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_EQ_OS); } __mmask8 test_mm512_cmp_pd_mask_lt_oq(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_lt_oq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 17, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ); } __mmask8 test_mm512_cmp_pd_mask_le_oq(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_le_oq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 18, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_LE_OQ); } __mmask8 test_mm512_cmp_pd_mask_unord_s(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_unord_s - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 19, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_UNORD_S); } __mmask8 test_mm512_cmp_pd_mask_neq_us(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_neq_us - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 20, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_NEQ_US); } __mmask8 test_mm512_cmp_pd_mask_nlt_uq(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_nlt_uq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 21, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_NLT_UQ); } __mmask8 test_mm512_cmp_pd_mask_nle_uq(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_nle_uq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 22, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_NLE_UQ); } __mmask8 test_mm512_cmp_pd_mask_ord_s(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_ord_s - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 23, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_ORD_S); } __mmask8 test_mm512_cmp_pd_mask_eq_us(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_eq_us - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 24, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_EQ_US); } __mmask8 test_mm512_cmp_pd_mask_nge_uq(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_nge_uq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 25, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_NGE_UQ); } __mmask8 test_mm512_cmp_pd_mask_ngt_uq(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_ngt_uq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 26, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_NGT_UQ); } __mmask8 test_mm512_cmp_pd_mask_false_os(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_false_os - // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 27, i32 4) + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 27, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_FALSE_OS); } __mmask8 test_mm512_cmp_pd_mask_neq_os(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_neq_os - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 28, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_NEQ_OS); } __mmask8 test_mm512_cmp_pd_mask_ge_oq(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_ge_oq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 29, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_GE_OQ); } __mmask8 test_mm512_cmp_pd_mask_gt_oq(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_gt_oq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 30, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_GT_OQ); } __mmask8 test_mm512_cmp_pd_mask_true_us(__m512d a, __m512d b) { // CHECK-LABEL: test_mm512_cmp_pd_mask_true_us - // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 31, i32 4) + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 31, <8 x i1> {{.*}}, i32 4) #2 return _mm512_cmp_pd_mask(a, b, _CMP_TRUE_US); } __mmask8 test_mm512_mask_cmp_pd_mask_eq_oq(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmp_pd_mask_eq_oq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 0, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ); } __mmask8 test_mm512_mask_cmp_pd_mask_lt_os(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_lt_os - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 1, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS); } __mmask8 test_mm512_mask_cmp_pd_mask_le_os(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_le_os - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 2, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS); } __mmask8 test_mm512_mask_cmp_pd_mask_unord_q(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_unord_q - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 3, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q); } __mmask8 test_mm512_mask_cmp_pd_mask_neq_uq(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_neq_uq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ); } __mmask8 test_mm512_mask_cmp_pd_mask_nlt_us(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nlt_us - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 5, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US); } __mmask8 test_mm512_mask_cmp_pd_mask_nle_us(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nle_us - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 6, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US); } __mmask8 test_mm512_mask_cmp_pd_mask_ord_q(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ord_q - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 7, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q); } __mmask8 test_mm512_mask_cmp_pd_mask_eq_uq(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_eq_uq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 8, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_UQ); } __mmask8 test_mm512_mask_cmp_pd_mask_nge_us(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nge_us - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 9, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NGE_US); } __mmask8 test_mm512_mask_cmp_pd_mask_ngt_us(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ngt_us - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 10, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NGT_US); } __mmask8 test_mm512_mask_cmp_pd_mask_false_oq(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_false_oq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 11, i32 4) - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 11, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OQ); } __mmask8 test_mm512_mask_cmp_pd_mask_neq_oq(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_neq_oq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 12, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OQ); } __mmask8 test_mm512_mask_cmp_pd_mask_ge_os(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ge_os - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 13, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_GE_OS); } __mmask8 test_mm512_mask_cmp_pd_mask_gt_os(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_gt_os - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 14, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_GT_OS); } __mmask8 test_mm512_mask_cmp_pd_mask_true_uq(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_true_uq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 15, i32 4) - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 15, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_UQ); } __mmask8 test_mm512_mask_cmp_pd_mask_eq_os(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_eq_os - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 16, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OS); } __mmask8 test_mm512_mask_cmp_pd_mask_lt_oq(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_lt_oq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 17, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OQ); } __mmask8 test_mm512_mask_cmp_pd_mask_le_oq(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_le_oq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 18, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OQ); } __mmask8 test_mm512_mask_cmp_pd_mask_unord_s(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_unord_s - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 19, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_S); } __mmask8 test_mm512_mask_cmp_pd_mask_neq_us(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_neq_us - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 20, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_US); } __mmask8 test_mm512_mask_cmp_pd_mask_nlt_uq(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nlt_uq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 21, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_UQ); } __mmask8 test_mm512_mask_cmp_pd_mask_nle_uq(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nle_uq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 22, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_UQ); } __mmask8 test_mm512_mask_cmp_pd_mask_ord_s(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ord_s - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 23, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_S); } __mmask8 test_mm512_mask_cmp_pd_mask_eq_us(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_eq_us - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 24, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_US); } __mmask8 test_mm512_mask_cmp_pd_mask_nge_uq(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nge_uq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 25, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NGE_UQ); } __mmask8 test_mm512_mask_cmp_pd_mask_ngt_uq(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ngt_uq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 26, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NGT_UQ); } __mmask8 test_mm512_mask_cmp_pd_mask_false_os(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_false_os - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 27, i32 4) - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 27, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OS); } __mmask8 test_mm512_mask_cmp_pd_mask_neq_os(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_neq_os - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 28, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OS); } __mmask8 test_mm512_mask_cmp_pd_mask_ge_oq(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ge_oq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 29, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_GE_OQ); } __mmask8 test_mm512_mask_cmp_pd_mask_gt_oq(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_gt_oq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 30, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_GT_OQ); } __mmask8 test_mm512_mask_cmp_pd_mask_true_us(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_true_us - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 31, i32 4) - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 31, <8 x i1> {{.*}}, i32 4) #2 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_US); } diff --git a/clang/test/CodeGen/avx512vl-builtins-constrained-cmp.c b/clang/test/CodeGen/avx512vl-builtins-constrained-cmp.c --- a/clang/test/CodeGen/avx512vl-builtins-constrained-cmp.c +++ b/clang/test/CodeGen/avx512vl-builtins-constrained-cmp.c @@ -4,1664 +4,1536 @@ __mmask8 test_mm256_cmp_ps_mask_eq_oq(__m256 a, __m256 b) { // CHECK-LABEL: @test_mm256_cmp_ps_mask_eq_oq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 0, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_EQ_OQ); } __mmask8 test_mm256_cmp_ps_mask_lt_os(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_lt_os - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 1, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_LT_OS); } __mmask8 test_mm256_cmp_ps_mask_le_os(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_le_os - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 2, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_LE_OS); } __mmask8 test_mm256_cmp_ps_mask_unord_q(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_unord_q - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 3, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_UNORD_Q); } __mmask8 test_mm256_cmp_ps_mask_neq_uq(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_neq_uq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 4, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_NEQ_UQ); } __mmask8 test_mm256_cmp_ps_mask_nlt_us(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_nlt_us - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 5, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_NLT_US); } __mmask8 test_mm256_cmp_ps_mask_nle_us(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_nle_us - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 6, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_NLE_US); } __mmask8 test_mm256_cmp_ps_mask_ord_q(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_ord_q - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 7, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_ORD_Q); } __mmask8 test_mm256_cmp_ps_mask_eq_uq(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_eq_uq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 8, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_EQ_UQ); } __mmask8 test_mm256_cmp_ps_mask_nge_us(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_nge_us - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 9, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_NGE_US); } __mmask8 test_mm256_cmp_ps_mask_ngt_us(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_ngt_us - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 10, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_NGT_US); } __mmask8 test_mm256_cmp_ps_mask_false_oq(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_false_oq - // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11) + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_FALSE_OQ); } __mmask8 test_mm256_cmp_ps_mask_neq_oq(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_neq_oq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 12, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_NEQ_OQ); } __mmask8 test_mm256_cmp_ps_mask_ge_os(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_ge_os - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 13, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_GE_OS); } __mmask8 test_mm256_cmp_ps_mask_gt_os(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_gt_os - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 14, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_GT_OS); } __mmask8 test_mm256_cmp_ps_mask_true_uq(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_true_uq - // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 15) + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 15, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_TRUE_UQ); } __mmask8 test_mm256_cmp_ps_mask_eq_os(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_eq_os - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 16, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_EQ_OS); } __mmask8 test_mm256_cmp_ps_mask_lt_oq(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_lt_oq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 17, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_LT_OQ); } __mmask8 test_mm256_cmp_ps_mask_le_oq(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_le_oq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 18, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_LE_OQ); } __mmask8 test_mm256_cmp_ps_mask_unord_s(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_unord_s - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 19, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_UNORD_S); } __mmask8 test_mm256_cmp_ps_mask_neq_us(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_neq_us - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 20, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_NEQ_US); } __mmask8 test_mm256_cmp_ps_mask_nlt_uq(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_nlt_uq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 21, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_NLT_UQ); } __mmask8 test_mm256_cmp_ps_mask_nle_uq(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_nle_uq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 22, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_NLE_UQ); } __mmask8 test_mm256_cmp_ps_mask_ord_s(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_ord_s - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 23, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_ORD_S); } __mmask8 test_mm256_cmp_ps_mask_eq_us(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_eq_us - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 24, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_EQ_US); } __mmask8 test_mm256_cmp_ps_mask_nge_uq(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_nge_uq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 25, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_NGE_UQ); } __mmask8 test_mm256_cmp_ps_mask_ngt_uq(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_ngt_uq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 26, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_NGT_UQ); } __mmask8 test_mm256_cmp_ps_mask_false_os(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_false_os - // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 27) + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 27, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_FALSE_OS); } __mmask8 test_mm256_cmp_ps_mask_neq_os(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_neq_os - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 28, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_NEQ_OS); } __mmask8 test_mm256_cmp_ps_mask_ge_oq(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_ge_oq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 29, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_GE_OQ); } __mmask8 test_mm256_cmp_ps_mask_gt_oq(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_gt_oq - // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 30, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_GT_OQ); } __mmask8 test_mm256_cmp_ps_mask_true_us(__m256 a, __m256 b) { // CHECK-LABEL: test_mm256_cmp_ps_mask_true_us - // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 31) + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 31, <8 x i1> {{.*}}) return _mm256_cmp_ps_mask(a, b, _CMP_TRUE_US); } __mmask8 test_mm256_mask_cmp_ps_mask_eq_oq(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: @test_mm256_mask_cmp_ps_mask_eq_oq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 0, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ); } __mmask8 test_mm256_mask_cmp_ps_mask_lt_os(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_lt_os - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 1, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS); } __mmask8 test_mm256_mask_cmp_ps_mask_le_os(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_le_os - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 2, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS); } __mmask8 test_mm256_mask_cmp_ps_mask_unord_q(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_unord_q - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 3, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q); } __mmask8 test_mm256_mask_cmp_ps_mask_neq_uq(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_neq_uq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 4, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ); } __mmask8 test_mm256_mask_cmp_ps_mask_nlt_us(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nlt_us - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 5, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US); } __mmask8 test_mm256_mask_cmp_ps_mask_nle_us(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nle_us - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 6, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US); } __mmask8 test_mm256_mask_cmp_ps_mask_ord_q(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ord_q - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 7, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q); } __mmask8 test_mm256_mask_cmp_ps_mask_eq_uq(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_eq_uq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 8, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_EQ_UQ); } __mmask8 test_mm256_mask_cmp_ps_mask_nge_us(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nge_us - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 9, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NGE_US); } __mmask8 test_mm256_mask_cmp_ps_mask_ngt_us(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ngt_us - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 10, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NGT_US); } __mmask8 test_mm256_mask_cmp_ps_mask_false_oq(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_false_oq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11) - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OQ); } __mmask8 test_mm256_mask_cmp_ps_mask_neq_oq(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_neq_oq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 12, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OQ); } __mmask8 test_mm256_mask_cmp_ps_mask_ge_os(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ge_os - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 13, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_GE_OS); } __mmask8 test_mm256_mask_cmp_ps_mask_gt_os(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_gt_os - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 14, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_GT_OS); } __mmask8 test_mm256_mask_cmp_ps_mask_true_uq(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_true_uq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 15) - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 15, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_UQ); } __mmask8 test_mm256_mask_cmp_ps_mask_eq_os(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_eq_os - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 16, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OS); } __mmask8 test_mm256_mask_cmp_ps_mask_lt_oq(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_lt_oq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 17, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_LT_OQ); } __mmask8 test_mm256_mask_cmp_ps_mask_le_oq(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_le_oq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 18, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_LE_OQ); } __mmask8 test_mm256_mask_cmp_ps_mask_unord_s(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_unord_s - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 19, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_S); } __mmask8 test_mm256_mask_cmp_ps_mask_neq_us(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_neq_us - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 20, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_US); } __mmask8 test_mm256_mask_cmp_ps_mask_nlt_uq(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nlt_uq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 21, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NLT_UQ); } __mmask8 test_mm256_mask_cmp_ps_mask_nle_uq(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nle_uq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 22, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NLE_UQ); } __mmask8 test_mm256_mask_cmp_ps_mask_ord_s(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ord_s - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 23, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_ORD_S); } __mmask8 test_mm256_mask_cmp_ps_mask_eq_us(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_eq_us - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 24, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_EQ_US); } __mmask8 test_mm256_mask_cmp_ps_mask_nge_uq(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nge_uq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 25, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NGE_UQ); } __mmask8 test_mm256_mask_cmp_ps_mask_ngt_uq(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ngt_uq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 26, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NGT_UQ); } __mmask8 test_mm256_mask_cmp_ps_mask_false_os(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_false_os - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 27) - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 27, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OS); } __mmask8 test_mm256_mask_cmp_ps_mask_neq_os(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_neq_os - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 28, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OS); } __mmask8 test_mm256_mask_cmp_ps_mask_ge_oq(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ge_oq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 29, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_GE_OQ); } __mmask8 test_mm256_mask_cmp_ps_mask_gt_oq(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_gt_oq - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 30, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_GT_OQ); } __mmask8 test_mm256_mask_cmp_ps_mask_true_us(__mmask8 m, __m256 a, __m256 b) { // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_true_us - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 31) - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 31, <8 x i1> {{.*}}) return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_US); } __mmask8 test_mm256_cmp_pd_mask_eq_oq(__m256d a, __m256d b) { // CHECK-LABEL: @test_mm256_cmp_pd_mask_eq_oq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 0, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_EQ_OQ); } __mmask8 test_mm256_cmp_pd_mask_lt_os(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_lt_os - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 1, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_LT_OS); } __mmask8 test_mm256_cmp_pd_mask_le_os(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_le_os - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 2, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_LE_OS); } __mmask8 test_mm256_cmp_pd_mask_unord_q(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_unord_q - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 3, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_UNORD_Q); } __mmask8 test_mm256_cmp_pd_mask_neq_uq(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_neq_uq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 4, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_NEQ_UQ); } __mmask8 test_mm256_cmp_pd_mask_nlt_us(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_nlt_us - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 5, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_NLT_US); } __mmask8 test_mm256_cmp_pd_mask_nle_us(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_nle_us - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 6, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_NLE_US); } __mmask8 test_mm256_cmp_pd_mask_ord_q(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_ord_q - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 7, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_ORD_Q); } __mmask8 test_mm256_cmp_pd_mask_eq_uq(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_eq_uq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 8, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_EQ_UQ); } __mmask8 test_mm256_cmp_pd_mask_nge_us(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_nge_us - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 9, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_NGE_US); } __mmask8 test_mm256_cmp_pd_mask_ngt_us(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_ngt_us - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 10, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_NGT_US); } __mmask8 test_mm256_cmp_pd_mask_false_oq(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_false_oq - // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11) + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_FALSE_OQ); } __mmask8 test_mm256_cmp_pd_mask_neq_oq(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_neq_oq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 12, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_NEQ_OQ); } __mmask8 test_mm256_cmp_pd_mask_ge_os(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_ge_os - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 13, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_GE_OS); } __mmask8 test_mm256_cmp_pd_mask_gt_os(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_gt_os - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 14, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_GT_OS); } __mmask8 test_mm256_cmp_pd_mask_true_uq(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_true_uq - // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 15) + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 15, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_TRUE_UQ); } __mmask8 test_mm256_cmp_pd_mask_eq_os(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_eq_os - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 16, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_EQ_OS); } __mmask8 test_mm256_cmp_pd_mask_lt_oq(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_lt_oq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 17, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_LT_OQ); } __mmask8 test_mm256_cmp_pd_mask_le_oq(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_le_oq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 18, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_LE_OQ); } __mmask8 test_mm256_cmp_pd_mask_unord_s(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_unord_s - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 19, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_UNORD_S); } __mmask8 test_mm256_cmp_pd_mask_neq_us(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_neq_us - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 20, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_NEQ_US); } __mmask8 test_mm256_cmp_pd_mask_nlt_uq(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_nlt_uq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 21, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_NLT_UQ); } __mmask8 test_mm256_cmp_pd_mask_nle_uq(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_nle_uq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 22, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_NLE_UQ); } __mmask8 test_mm256_cmp_pd_mask_ord_s(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_ord_s - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 23, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_ORD_S); } __mmask8 test_mm256_cmp_pd_mask_eq_us(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_eq_us - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 24, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_EQ_US); } __mmask8 test_mm256_cmp_pd_mask_nge_uq(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_nge_uq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 25, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_NGE_UQ); } __mmask8 test_mm256_cmp_pd_mask_ngt_uq(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_ngt_uq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 26, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_NGT_UQ); } __mmask8 test_mm256_cmp_pd_mask_false_os(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_false_os - // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 27) + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 27, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_FALSE_OS); } __mmask8 test_mm256_cmp_pd_mask_neq_os(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_neq_os - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 28, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_NEQ_OS); } __mmask8 test_mm256_cmp_pd_mask_ge_oq(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_ge_oq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 29, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_GE_OQ); } __mmask8 test_mm256_cmp_pd_mask_gt_oq(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_gt_oq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 30, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_GT_OQ); } __mmask8 test_mm256_cmp_pd_mask_true_us(__m256d a, __m256d b) { // CHECK-LABEL: test_mm256_cmp_pd_mask_true_us - // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 31) + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 31, <4 x i1> {{.*}}) return _mm256_cmp_pd_mask(a, b, _CMP_TRUE_US); } __mmask8 test_mm256_mask_cmp_pd_mask_eq_oq(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: @test_mm256_mask_cmp_pd_mask_eq_oq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 0, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ); } __mmask8 test_mm256_mask_cmp_pd_mask_lt_os(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_lt_os - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 1, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS); } __mmask8 test_mm256_mask_cmp_pd_mask_le_os(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_le_os - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 2, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS); } __mmask8 test_mm256_mask_cmp_pd_mask_unord_q(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_unord_q - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 3, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q); } __mmask8 test_mm256_mask_cmp_pd_mask_neq_uq(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_neq_uq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 4, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ); } __mmask8 test_mm256_mask_cmp_pd_mask_nlt_us(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nlt_us - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 5, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US); } __mmask8 test_mm256_mask_cmp_pd_mask_nle_us(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nle_us - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 6, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US); } __mmask8 test_mm256_mask_cmp_pd_mask_ord_q(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ord_q - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 7, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q); } __mmask8 test_mm256_mask_cmp_pd_mask_eq_uq(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_eq_uq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 8, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_EQ_UQ); } __mmask8 test_mm256_mask_cmp_pd_mask_nge_us(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nge_us - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 9, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NGE_US); } __mmask8 test_mm256_mask_cmp_pd_mask_ngt_us(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ngt_us - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 10, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NGT_US); } __mmask8 test_mm256_mask_cmp_pd_mask_false_oq(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_false_oq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11) - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OQ); } __mmask8 test_mm256_mask_cmp_pd_mask_neq_oq(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_neq_oq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 12, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OQ); } __mmask8 test_mm256_mask_cmp_pd_mask_ge_os(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ge_os - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 13, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_GE_OS); } __mmask8 test_mm256_mask_cmp_pd_mask_gt_os(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_gt_os - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 14, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_GT_OS); } __mmask8 test_mm256_mask_cmp_pd_mask_true_uq(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_true_uq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 15) - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 15, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_UQ); } __mmask8 test_mm256_mask_cmp_pd_mask_eq_os(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_eq_os - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 16, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OS); } __mmask8 test_mm256_mask_cmp_pd_mask_lt_oq(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_lt_oq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 17, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_LT_OQ); } __mmask8 test_mm256_mask_cmp_pd_mask_le_oq(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_le_oq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 18, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_LE_OQ); } __mmask8 test_mm256_mask_cmp_pd_mask_unord_s(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_unord_s - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 19, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_S); } __mmask8 test_mm256_mask_cmp_pd_mask_neq_us(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_neq_us - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 20, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_US); } __mmask8 test_mm256_mask_cmp_pd_mask_nlt_uq(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nlt_uq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 21, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NLT_UQ); } __mmask8 test_mm256_mask_cmp_pd_mask_nle_uq(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nle_uq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 22, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NLE_UQ); } __mmask8 test_mm256_mask_cmp_pd_mask_ord_s(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ord_s - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 23, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_ORD_S); } __mmask8 test_mm256_mask_cmp_pd_mask_eq_us(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_eq_us - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 24, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_EQ_US); } __mmask8 test_mm256_mask_cmp_pd_mask_nge_uq(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nge_uq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 25, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NGE_UQ); } __mmask8 test_mm256_mask_cmp_pd_mask_ngt_uq(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ngt_uq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 26, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NGT_UQ); } __mmask8 test_mm256_mask_cmp_pd_mask_false_os(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_false_os - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 27) - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 27, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OS); } __mmask8 test_mm256_mask_cmp_pd_mask_neq_os(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_neq_os - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 28, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OS); } __mmask8 test_mm256_mask_cmp_pd_mask_ge_oq(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ge_oq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 29, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_GE_OQ); } __mmask8 test_mm256_mask_cmp_pd_mask_gt_oq(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_gt_oq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 30, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_GT_OQ); } __mmask8 test_mm256_mask_cmp_pd_mask_true_us(__mmask8 m, __m256d a, __m256d b) { // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_true_us - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 31) - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 31, <4 x i1> {{.*}}) return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_US); } __mmask8 test_mm_cmp_ps_mask_eq_oq(__m128 a, __m128 b) { // CHECK-LABEL: @test_mm_cmp_ps_mask_eq_oq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 0, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_EQ_OQ); } __mmask8 test_mm_cmp_ps_mask_lt_os(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_lt_os - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 1, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_LT_OS); } __mmask8 test_mm_cmp_ps_mask_le_os(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_le_os - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 2, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_LE_OS); } __mmask8 test_mm_cmp_ps_mask_unord_q(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_unord_q - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 3, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_UNORD_Q); } __mmask8 test_mm_cmp_ps_mask_neq_uq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_neq_uq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 4, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_NEQ_UQ); } __mmask8 test_mm_cmp_ps_mask_nlt_us(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_nlt_us - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 5, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_NLT_US); } __mmask8 test_mm_cmp_ps_mask_nle_us(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_nle_us - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 6, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_NLE_US); } __mmask8 test_mm_cmp_ps_mask_ord_q(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_ord_q - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 7, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_ORD_Q); } __mmask8 test_mm_cmp_ps_mask_eq_uq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_eq_uq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 8, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_EQ_UQ); } __mmask8 test_mm_cmp_ps_mask_nge_us(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_nge_us - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 9, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_NGE_US); } __mmask8 test_mm_cmp_ps_mask_ngt_us(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_ngt_us - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 10, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_NGT_US); } __mmask8 test_mm_cmp_ps_mask_false_oq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_false_oq - // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 11) + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 11, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_FALSE_OQ); } __mmask8 test_mm_cmp_ps_mask_neq_oq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_neq_oq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 12, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_NEQ_OQ); } __mmask8 test_mm_cmp_ps_mask_ge_os(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_ge_os - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 13, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_GE_OS); } __mmask8 test_mm_cmp_ps_mask_gt_os(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_gt_os - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 14, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_GT_OS); } __mmask8 test_mm_cmp_ps_mask_true_uq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_true_uq - // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 15) + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 15, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_TRUE_UQ); } __mmask8 test_mm_cmp_ps_mask_eq_os(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_eq_os - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 16, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_EQ_OS); } __mmask8 test_mm_cmp_ps_mask_lt_oq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_lt_oq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 17, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_LT_OQ); } __mmask8 test_mm_cmp_ps_mask_le_oq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_le_oq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 18, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_LE_OQ); } __mmask8 test_mm_cmp_ps_mask_unord_s(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_unord_s - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 19, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_UNORD_S); } __mmask8 test_mm_cmp_ps_mask_neq_us(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_neq_us - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 20, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_NEQ_US); } __mmask8 test_mm_cmp_ps_mask_nlt_uq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_nlt_uq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 21, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_NLT_UQ); } __mmask8 test_mm_cmp_ps_mask_nle_uq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_nle_uq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 22, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_NLE_UQ); } __mmask8 test_mm_cmp_ps_mask_ord_s(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_ord_s - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 23, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_ORD_S); } __mmask8 test_mm_cmp_ps_mask_eq_us(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_eq_us - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 24, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_EQ_US); } __mmask8 test_mm_cmp_ps_mask_nge_uq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_nge_uq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 25, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_NGE_UQ); } __mmask8 test_mm_cmp_ps_mask_ngt_uq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_ngt_uq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 26, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_NGT_UQ); } __mmask8 test_mm_cmp_ps_mask_false_os(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_false_os - // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 27) + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 27, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_FALSE_OS); } __mmask8 test_mm_cmp_ps_mask_neq_os(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_neq_os - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 28, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_NEQ_OS); } __mmask8 test_mm_cmp_ps_mask_ge_oq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_ge_oq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 29, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_GE_OQ); } __mmask8 test_mm_cmp_ps_mask_gt_oq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_gt_oq - // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 30, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_GT_OQ); } __mmask8 test_mm_cmp_ps_mask_true_us(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_mask_true_us - // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 31) + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 31, <4 x i1> {{.*}}) return _mm_cmp_ps_mask(a, b, _CMP_TRUE_US); } __mmask8 test_mm_mask_cmp_ps_mask_eq_oq(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: @test_mm_mask_cmp_ps_mask_eq_oq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 0, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ); } __mmask8 test_mm_mask_cmp_ps_mask_lt_os(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_lt_os - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 1, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS); } __mmask8 test_mm_mask_cmp_ps_mask_le_os(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_le_os - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 2, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS); } __mmask8 test_mm_mask_cmp_ps_mask_unord_q(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_unord_q - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 3, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q); } __mmask8 test_mm_mask_cmp_ps_mask_neq_uq(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_neq_uq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 4, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ); } __mmask8 test_mm_mask_cmp_ps_mask_nlt_us(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nlt_us - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 5, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US); } __mmask8 test_mm_mask_cmp_ps_mask_nle_us(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nle_us - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 6, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US); } __mmask8 test_mm_mask_cmp_ps_mask_ord_q(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ord_q - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 7, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q); } __mmask8 test_mm_mask_cmp_ps_mask_eq_uq(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_eq_uq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 8, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_EQ_UQ); } __mmask8 test_mm_mask_cmp_ps_mask_nge_us(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nge_us - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 9, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NGE_US); } __mmask8 test_mm_mask_cmp_ps_mask_ngt_us(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ngt_us - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 10, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NGT_US); } __mmask8 test_mm_mask_cmp_ps_mask_false_oq(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_false_oq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 11) - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 11, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OQ); } __mmask8 test_mm_mask_cmp_ps_mask_neq_oq(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_neq_oq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 12, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OQ); } __mmask8 test_mm_mask_cmp_ps_mask_ge_os(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ge_os - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 13, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_GE_OS); } __mmask8 test_mm_mask_cmp_ps_mask_gt_os(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_gt_os - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 14, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_GT_OS); } __mmask8 test_mm_mask_cmp_ps_mask_true_uq(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_true_uq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 15) - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 15, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_UQ); } __mmask8 test_mm_mask_cmp_ps_mask_eq_os(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_eq_os - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 16, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OS); } __mmask8 test_mm_mask_cmp_ps_mask_lt_oq(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_lt_oq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 17, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_LT_OQ); } __mmask8 test_mm_mask_cmp_ps_mask_le_oq(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_le_oq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 18, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_LE_OQ); } __mmask8 test_mm_mask_cmp_ps_mask_unord_s(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_unord_s - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 19, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_S); } __mmask8 test_mm_mask_cmp_ps_mask_neq_us(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_neq_us - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 20, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_US); } __mmask8 test_mm_mask_cmp_ps_mask_nlt_uq(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nlt_uq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 21, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NLT_UQ); } __mmask8 test_mm_mask_cmp_ps_mask_nle_uq(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nle_uq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 22, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NLE_UQ); } __mmask8 test_mm_mask_cmp_ps_mask_ord_s(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ord_s - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 23, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_ORD_S); } __mmask8 test_mm_mask_cmp_ps_mask_eq_us(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_eq_us - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 24, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_EQ_US); } __mmask8 test_mm_mask_cmp_ps_mask_nge_uq(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nge_uq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 25, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NGE_UQ); } __mmask8 test_mm_mask_cmp_ps_mask_ngt_uq(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ngt_uq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 26, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NGT_UQ); } __mmask8 test_mm_mask_cmp_ps_mask_false_os(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_false_os - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 27) - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 27, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OS); } __mmask8 test_mm_mask_cmp_ps_mask_neq_os(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_neq_os - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 28, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OS); } __mmask8 test_mm_mask_cmp_ps_mask_ge_oq(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ge_oq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 29, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_GE_OQ); } __mmask8 test_mm_mask_cmp_ps_mask_gt_oq(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_gt_oq - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 30, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_GT_OQ); } __mmask8 test_mm_mask_cmp_ps_mask_true_us(__mmask8 m, __m128 a, __m128 b) { // CHECK-LABEL: test_mm_mask_cmp_ps_mask_true_us - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 31) - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 31, <4 x i1> {{.*}}) return _mm_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_US); } __mmask8 test_mm_cmp_pd_mask_eq_oq(__m128d a, __m128d b) { // CHECK-LABEL: @test_mm_cmp_pd_mask_eq_oq - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 0, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_EQ_OQ); } __mmask8 test_mm_cmp_pd_mask_lt_os(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_lt_os - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 1, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_LT_OS); } __mmask8 test_mm_cmp_pd_mask_le_os(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_le_os - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 2, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_LE_OS); } __mmask8 test_mm_cmp_pd_mask_unord_q(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_unord_q - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 3, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_UNORD_Q); } __mmask8 test_mm_cmp_pd_mask_neq_uq(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_neq_uq - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 4, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_NEQ_UQ); } __mmask8 test_mm_cmp_pd_mask_nlt_us(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_nlt_us - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 5, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_NLT_US); } __mmask8 test_mm_cmp_pd_mask_nle_us(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_nle_us - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 6, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_NLE_US); } __mmask8 test_mm_cmp_pd_mask_ord_q(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_ord_q - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 7, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_ORD_Q); } __mmask8 test_mm_cmp_pd_mask_eq_uq(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_eq_uq - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 8, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_EQ_UQ); } __mmask8 test_mm_cmp_pd_mask_nge_us(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_nge_us - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 9, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_NGE_US); } __mmask8 test_mm_cmp_pd_mask_ngt_us(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_ngt_us - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 10, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_NGT_US); } __mmask8 test_mm_cmp_pd_mask_false_oq(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_false_oq - // CHECK: call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 11) + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 11, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_FALSE_OQ); } __mmask8 test_mm_cmp_pd_mask_neq_oq(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_neq_oq - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 12, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_NEQ_OQ); } __mmask8 test_mm_cmp_pd_mask_ge_os(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_ge_os - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 13, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_GE_OS); } __mmask8 test_mm_cmp_pd_mask_gt_os(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_gt_os - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 14, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_GT_OS); } __mmask8 test_mm_cmp_pd_mask_true_uq(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_true_uq - // CHECK: call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 15) + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 15, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_TRUE_UQ); } __mmask8 test_mm_cmp_pd_mask_eq_os(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_eq_os - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 16, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_EQ_OS); } __mmask8 test_mm_cmp_pd_mask_lt_oq(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_lt_oq - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 17, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_LT_OQ); } __mmask8 test_mm_cmp_pd_mask_le_oq(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_le_oq - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 18, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_LE_OQ); } __mmask8 test_mm_cmp_pd_mask_unord_s(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_unord_s - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 19, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_UNORD_S); } __mmask8 test_mm_cmp_pd_mask_neq_us(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_neq_us - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 20, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_NEQ_US); } __mmask8 test_mm_cmp_pd_mask_nlt_uq(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_nlt_uq - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 21, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_NLT_UQ); } __mmask8 test_mm_cmp_pd_mask_nle_uq(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_nle_uq - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 22, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_NLE_UQ); } __mmask8 test_mm_cmp_pd_mask_ord_s(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_ord_s - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 23, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_ORD_S); } __mmask8 test_mm_cmp_pd_mask_eq_us(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_eq_us - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 24, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_EQ_US); } __mmask8 test_mm_cmp_pd_mask_nge_uq(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_nge_uq - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 25, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_NGE_UQ); } __mmask8 test_mm_cmp_pd_mask_ngt_uq(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_ngt_uq - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 26, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_NGT_UQ); } __mmask8 test_mm_cmp_pd_mask_false_os(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_false_os - // CHECK: call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 27) + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 27, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_FALSE_OS); } __mmask8 test_mm_cmp_pd_mask_neq_os(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_neq_os - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 28, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_NEQ_OS); } __mmask8 test_mm_cmp_pd_mask_ge_oq(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_ge_oq - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 29, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_GE_OQ); } __mmask8 test_mm_cmp_pd_mask_gt_oq(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_gt_oq - // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 30, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_GT_OQ); } __mmask8 test_mm_cmp_pd_mask_true_us(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_mask_true_us - // CHECK: call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 31) + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 31, <2 x i1> {{.*}}) return _mm_cmp_pd_mask(a, b, _CMP_TRUE_US); } __mmask8 test_mm_mask_cmp_pd_mask_eq_oq(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: @test_mm_mask_cmp_pd_mask_eq_oq - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 0, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ); } __mmask8 test_mm_mask_cmp_pd_mask_lt_os(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_lt_os - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 1, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS); } __mmask8 test_mm_mask_cmp_pd_mask_le_os(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_le_os - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 2, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS); } __mmask8 test_mm_mask_cmp_pd_mask_unord_q(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_unord_q - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 3, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q); } __mmask8 test_mm_mask_cmp_pd_mask_neq_uq(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_neq_uq - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 4, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ); } __mmask8 test_mm_mask_cmp_pd_mask_nlt_us(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nlt_us - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 5, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US); } __mmask8 test_mm_mask_cmp_pd_mask_nle_us(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nle_us - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 6, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US); } __mmask8 test_mm_mask_cmp_pd_mask_ord_q(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ord_q - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 7, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q); } __mmask8 test_mm_mask_cmp_pd_mask_eq_uq(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_eq_uq - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 8, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_EQ_UQ); } __mmask8 test_mm_mask_cmp_pd_mask_nge_us(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nge_us - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 9, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NGE_US); } __mmask8 test_mm_mask_cmp_pd_mask_ngt_us(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ngt_us - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 10, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NGT_US); } __mmask8 test_mm_mask_cmp_pd_mask_false_oq(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_false_oq - // [[CMP]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 11) - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 11, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OQ); } __mmask8 test_mm_mask_cmp_pd_mask_neq_oq(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_neq_oq - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 12, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OQ); } __mmask8 test_mm_mask_cmp_pd_mask_ge_os(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ge_os - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 13, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_GE_OS); } __mmask8 test_mm_mask_cmp_pd_mask_gt_os(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_gt_os - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 14, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_GT_OS); } __mmask8 test_mm_mask_cmp_pd_mask_true_uq(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_true_uq - // [[CMP]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 15) - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 15, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_UQ); } __mmask8 test_mm_mask_cmp_pd_mask_eq_os(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_eq_os - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 16, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OS); } __mmask8 test_mm_mask_cmp_pd_mask_lt_oq(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_lt_oq - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 17, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_LT_OQ); } __mmask8 test_mm_mask_cmp_pd_mask_le_oq(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_le_oq - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 18, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_LE_OQ); } __mmask8 test_mm_mask_cmp_pd_mask_unord_s(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_unord_s - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 19, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_S); } __mmask8 test_mm_mask_cmp_pd_mask_neq_us(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_neq_us - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 20, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_US); } __mmask8 test_mm_mask_cmp_pd_mask_nlt_uq(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nlt_uq - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 21, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NLT_UQ); } __mmask8 test_mm_mask_cmp_pd_mask_nle_uq(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nle_uq - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 22, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NLE_UQ); } __mmask8 test_mm_mask_cmp_pd_mask_ord_s(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ord_s - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 23, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_ORD_S); } __mmask8 test_mm_mask_cmp_pd_mask_eq_us(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_eq_us - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 24, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_EQ_US); } __mmask8 test_mm_mask_cmp_pd_mask_nge_uq(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nge_uq - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 25, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NGE_UQ); } __mmask8 test_mm_mask_cmp_pd_mask_ngt_uq(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ngt_uq - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 26, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NGT_UQ); } __mmask8 test_mm_mask_cmp_pd_mask_false_os(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_false_os - // [[CMP]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 27) - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 27, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OS); } __mmask8 test_mm_mask_cmp_pd_mask_neq_os(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_neq_os - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 28, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OS); } __mmask8 test_mm_mask_cmp_pd_mask_ge_oq(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ge_oq - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 29, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_GE_OQ); } __mmask8 test_mm_mask_cmp_pd_mask_gt_oq(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_gt_oq - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict") - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 30, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_GT_OQ); } __mmask8 test_mm_mask_cmp_pd_mask_true_us(__mmask8 m, __m128d a, __m128d b) { // CHECK-LABEL: test_mm_mask_cmp_pd_mask_true_us - // [[CMP]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 31) - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 31, <2 x i1> {{.*}}) return _mm_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_US); } diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -4749,26 +4749,26 @@ let TargetPrefix = "x86" in { // NOTE: These comparison intrinsics are not used by clang as long as the // distinction in signaling behaviour is not implemented. - def int_x86_avx512_cmp_ps_512 : + def int_x86_avx512_mask_cmp_ps_512 : Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_avx512_cmp_pd_512 : + llvm_i32_ty, llvm_v16i1_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>, ImmArg>]>; + def int_x86_avx512_mask_cmp_pd_512 : Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_avx512_cmp_ps_256 : + llvm_i32_ty, llvm_v8i1_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>, ImmArg>]>; + def int_x86_avx512_mask_cmp_ps_256 : Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_v8f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cmp_pd_256 : + llvm_i32_ty, llvm_v8i1_ty], [IntrNoMem, ImmArg>]>; + def int_x86_avx512_mask_cmp_pd_256 : Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_v4f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cmp_ps_128 : + llvm_i32_ty, llvm_v4i1_ty], [IntrNoMem, ImmArg>]>; + def int_x86_avx512_mask_cmp_ps_128 : Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cmp_pd_128 : + llvm_i32_ty, llvm_v4i1_ty], [IntrNoMem, ImmArg>]>; + def int_x86_avx512_mask_cmp_pd_128 : Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg>]>; + llvm_i32_ty, llvm_v2i1_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss_mask">, diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -68,6 +68,19 @@ return true; } +// Upgrade the declaration of fp compare intrinsics that change return type +// from scalar to vXi1 mask. +static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, + Function *&NewFn) { + // Check if the return type is a vector. + if (F->getReturnType()->isVectorTy()) + return false; + + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), IID); + return true; +} + static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { // All of the intrinsics matches below should be marked with which llvm // version started autoupgrading them. At some point in the future we would @@ -241,7 +254,7 @@ Name.startswith("avx512.mask.cmp.d") || // Added in 5.0 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0 - Name.startswith("avx512.mask.cmp.p") || // Added in 7.0 + Name.startswith("avx512.cmp.p") || // Added in 12.0 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0 Name.startswith("avx512.cvtb2mask.") || // Added in 7.0 Name.startswith("avx512.cvtw2mask.") || // Added in 7.0 @@ -456,6 +469,24 @@ if (Name == "avx2.mpsadbw") // Added in 3.6 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, NewFn); + if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0 + return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128, + NewFn); + if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0 + return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256, + NewFn); + if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0 + return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512, + NewFn); + if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0 + return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128, + NewFn); + if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0 + return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256, + NewFn); + if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0 + return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512, + NewFn); // frcz.ss/sd may need to have an argument dropped. Added in 3.2 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) { @@ -2000,38 +2031,36 @@ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), { CI->getOperand(0), CI->getArgOperand(1) }); Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) { - Type *OpTy = CI->getArgOperand(0)->getType(); + } else if (IsX86 && Name.startswith("avx512.cmp.p")) { + SmallVector Args(CI->arg_operands().begin(), + CI->arg_operands().end()); + Type *OpTy = Args[0]->getType(); unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); unsigned EltWidth = OpTy->getScalarSizeInBits(); Intrinsic::ID IID; if (VecWidth == 128 && EltWidth == 32) - IID = Intrinsic::x86_avx512_cmp_ps_128; + IID = Intrinsic::x86_avx512_mask_cmp_ps_128; else if (VecWidth == 256 && EltWidth == 32) - IID = Intrinsic::x86_avx512_cmp_ps_256; + IID = Intrinsic::x86_avx512_mask_cmp_ps_256; else if (VecWidth == 512 && EltWidth == 32) - IID = Intrinsic::x86_avx512_cmp_ps_512; + IID = Intrinsic::x86_avx512_mask_cmp_ps_512; else if (VecWidth == 128 && EltWidth == 64) - IID = Intrinsic::x86_avx512_cmp_pd_128; + IID = Intrinsic::x86_avx512_mask_cmp_pd_128; else if (VecWidth == 256 && EltWidth == 64) - IID = Intrinsic::x86_avx512_cmp_pd_256; + IID = Intrinsic::x86_avx512_mask_cmp_pd_256; else if (VecWidth == 512 && EltWidth == 64) - IID = Intrinsic::x86_avx512_cmp_pd_512; + IID = Intrinsic::x86_avx512_mask_cmp_pd_512; else llvm_unreachable("Unexpected intrinsic"); - SmallVector Args; - Args.push_back(CI->getArgOperand(0)); - Args.push_back(CI->getArgOperand(1)); - Args.push_back(CI->getArgOperand(2)); - if (CI->getNumArgOperands() == 5) - Args.push_back(CI->getArgOperand(4)); + Value *Mask = Constant::getAllOnesValue(CI->getType()); + if (VecWidth == 512) + std::swap(Mask, Args.back()); + Args.push_back(Mask); Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), Args); - Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3)); - } else if (IsX86 && Name.startswith("avx512.mask.cmp.") && - Name[16] != 'p') { + } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) { // Integer compare intrinsics. unsigned Imm = cast(CI->getArgOperand(2))->getZExtValue(); Rep = upgradeMaskedCompare(Builder, *CI, Imm, true); @@ -3718,6 +3747,41 @@ break; } + case Intrinsic::x86_avx512_mask_cmp_pd_128: + case Intrinsic::x86_avx512_mask_cmp_pd_256: + case Intrinsic::x86_avx512_mask_cmp_pd_512: + case Intrinsic::x86_avx512_mask_cmp_ps_128: + case Intrinsic::x86_avx512_mask_cmp_ps_256: + case Intrinsic::x86_avx512_mask_cmp_ps_512: { + SmallVector Args(CI->arg_operands().begin(), + CI->arg_operands().end()); + unsigned NumElts = cast(Args[0]->getType())->getNumElements(); + Args[3] = Builder.CreateBitCast( + Args[3], + FixedVectorType::get(Builder.getInt1Ty(), + Args[3]->getType()->getIntegerBitWidth())); + + if (NumElts < 8) { + int Indices[8]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i; + Args[3] = Builder.CreateShuffleVector(Args[3], Args[3], + makeArrayRef(Indices, NumElts)); + } + + NewCall = Builder.CreateCall(NewFn, Args); + Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr); + + StringRef Name = CI->getName(); + if (!Name.empty()) { + CI->setName(Name + ".old"); + NewCall->setName(Name); + } + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); + return; + } + case Intrinsic::thread_pointer: { NewCall = Builder.CreateCall(NewFn, {}); break; diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -521,9 +521,9 @@ // type. static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { unsigned Opcode = N->getOpcode(); - if (Opcode == X86ISD::CMPM || Opcode == X86ISD::STRICT_CMPM || - Opcode == ISD::SETCC || Opcode == X86ISD::CMPM_SAE || - Opcode == X86ISD::VFPCLASS) { + if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMM || + Opcode == X86ISD::STRICT_CMPM || Opcode == ISD::SETCC || + Opcode == X86ISD::CMPMM_SAE || Opcode == X86ISD::VFPCLASS) { // We can get 256-bit 8 element types here without VLX being enabled. When // this happens we will use 512-bit operations and the mask will not be // zero extended. diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -384,8 +384,10 @@ /// Vector comparison generating mask bits for fp and /// integer signed and unsigned data types. CMPM, - // Vector comparison with SAE for FP values - CMPM_SAE, + // Vector mask comparison generating mask bits for FP values. + CMPMM, + // Vector mask comparison with SAE for FP values. + CMPMM_SAE, // Arithmetic operations with FLAGS results. ADD, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24711,20 +24711,21 @@ case CMP_MASK_CC: { MVT MaskVT = Op.getSimpleValueType(); SDValue CC = Op.getOperand(3); + SDValue Mask = Op.getOperand(4); // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. if (IntrData->Opc1 != 0) { - SDValue Sae = Op.getOperand(4); + SDValue Sae = Op.getOperand(5); if (isRoundModeSAE(Sae)) return DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1), - Op.getOperand(2), CC, Sae); + Op.getOperand(2), CC, Mask, Sae); if (!isRoundModeCurDirection(Sae)) return SDValue(); } //default rounding mode return DAG.getNode(IntrData->Opc0, dl, MaskVT, - {Op.getOperand(1), Op.getOperand(2), CC}); + {Op.getOperand(1), Op.getOperand(2), CC, Mask}); } case CMP_MASK_SCALAR_CC: { SDValue Src1 = Op.getOperand(1); @@ -30310,8 +30311,9 @@ NODE_NAME_CASE(COMI) NODE_NAME_CASE(UCOMI) NODE_NAME_CASE(CMPM) + NODE_NAME_CASE(CMPMM) NODE_NAME_CASE(STRICT_CMPM) - NODE_NAME_CASE(CMPM_SAE) + NODE_NAME_CASE(CMPMM_SAE) NODE_NAME_CASE(SETCC) NODE_NAME_CASE(SETCC_CARRY) NODE_NAME_CASE(FSETCC) diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -1150,39 +1150,6 @@ } break; } - case Intrinsic::x86_avx512_cmp_pd_128: - case Intrinsic::x86_avx512_cmp_pd_256: - case Intrinsic::x86_avx512_cmp_pd_512: - case Intrinsic::x86_avx512_cmp_ps_128: - case Intrinsic::x86_avx512_cmp_ps_256: - case Intrinsic::x86_avx512_cmp_ps_512: { - // Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a) - Value *Arg0 = II.getArgOperand(0); - Value *Arg1 = II.getArgOperand(1); - bool Arg0IsZero = match(Arg0, PatternMatch::m_PosZeroFP()); - if (Arg0IsZero) - std::swap(Arg0, Arg1); - Value *A, *B; - // This fold requires only the NINF(not +/- inf) since inf minus - // inf is nan. - // NSZ(No Signed Zeros) is not needed because zeros of any sign are - // equal for both compares. - // NNAN is not needed because nans compare the same for both compares. - // The compare intrinsic uses the above assumptions and therefore - // doesn't require additional flags. - if ((match(Arg0, - PatternMatch::m_OneUse(PatternMatch::m_FSub( - PatternMatch::m_Value(A), PatternMatch::m_Value(B)))) && - match(Arg1, PatternMatch::m_PosZeroFP()) && isa(Arg0) && - cast(Arg0)->getFastMathFlags().noInfs())) { - if (Arg0IsZero) - std::swap(A, B); - IC.replaceOperand(II, 0, A); - IC.replaceOperand(II, 1, B); - return &II; - } - break; - } case Intrinsic::x86_avx512_add_ps_512: case Intrinsic::x86_avx512_div_ps_512: diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2494,10 +2494,6 @@ (X86cmpm node:$src1, node:$src2, node:$cc), [{ return N->hasOneUse(); }]>; -def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), - (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{ - return N->hasOneUse(); -}]>; def X86cmpm_imm_commute : SDNodeXFormgetZExtValue() & 0x1f); @@ -2564,19 +2560,71 @@ (!cast(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; + + // Patterns for mask intrinsics. + def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, + (_.KVT immAllOnesV)), + (!cast(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>; + + def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask), + (!cast(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1, + _.RC:$src2, timm:$cc)>; + + def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, + (_.KVT immAllOnesV)), + (!cast(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>; + + def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, + _.KRCWM:$mask), + (!cast(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, + addr:$src2, timm:$cc)>; + + def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, + (_.KVT immAllOnesV)), + (!cast(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>; + + def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, + _.KRCWM:$mask), + (!cast(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, + addr:$src2, timm:$cc)>; + + // Patterns for mask intrinsics with loads in other operand. + def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, + (_.KVT immAllOnesV)), + (!cast(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, + (X86cmpm_imm_commute timm:$cc))>; + + def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, + _.KRCWM:$mask), + (!cast(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, + _.RC:$src1, addr:$src2, + (X86cmpm_imm_commute timm:$cc))>; + + def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, + (_.KVT immAllOnesV)), + (!cast(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, + (X86cmpm_imm_commute timm:$cc))>; + + def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, + _.KRCWM:$mask), + (!cast(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, + _.RC:$src1, addr:$src2, + (X86cmpm_imm_commute timm:$cc))>; } multiclass avx512_vcmp_sae { // comparison code form (VCMP[EQ/LT/LE/...] let Uses = [MXCSR] in - defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, - (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, {sae}, $src2, $src1", "$src1, $src2, {sae}, $cc", - (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), - (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), - timm:$cc)>, + [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), + (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))], + [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), + (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>, EVEX_B, Sched<[sched]>; } diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -207,16 +207,21 @@ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisVec<1>, SDTCisSameAs<2, 1>, SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>; +def X86MaskCmpMaskCC : + SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, + SDTCisVec<1>, SDTCisSameAs<2, 1>, + SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>, SDTCisSameAs<4, 0>]>; def X86CmpMaskCCScalar : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisFP<1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; +def X86cmpmm : SDNode<"X86ISD::CMPMM", X86MaskCmpMaskCC>; def X86strict_cmpm : SDNode<"X86ISD::STRICT_CMPM", X86CmpMaskCC, [SDNPHasChain]>; def X86any_cmpm : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_cmpm node:$src1, node:$src2, node:$src3), (X86cmpm node:$src1, node:$src2, node:$src3)]>; -def X86cmpmSAE : SDNode<"X86ISD::CMPM_SAE", X86CmpMaskCC>; +def X86cmpmmSAE : SDNode<"X86ISD::CMPMM_SAE", X86MaskCmpMaskCC>; def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>; def X86cmpmsSAE : SDNode<"X86ISD::FSETCCM_SAE", X86CmpMaskCCScalar>; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -417,12 +417,6 @@ X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0), X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), - X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_SAE), - X86_INTRINSIC_DATA(avx512_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_SAE), X86_INTRINSIC_DATA(avx512_conflict_d_128, INTR_TYPE_1OP, X86ISD::CONFLICT, 0), X86_INTRINSIC_DATA(avx512_conflict_d_256, INTR_TYPE_1OP, X86ISD::CONFLICT, 0), X86_INTRINSIC_DATA(avx512_conflict_d_512, INTR_TYPE_1OP, X86ISD::CONFLICT, 0), @@ -464,6 +458,12 @@ X86ISD::FADDS, X86ISD::FADDS_RND), X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK, X86ISD::FADDS, X86ISD::FADDS_RND), + X86_INTRINSIC_DATA(avx512_mask_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPMM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPMM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPMM, X86ISD::CMPMM_SAE), + X86_INTRINSIC_DATA(avx512_mask_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPMM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPMM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPMM, X86ISD::CMPMM_SAE), X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC, X86ISD::FSETCCM, X86ISD::FSETCCM_SAE), X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC, diff --git a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll --- a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll +++ b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll @@ -22,13 +22,13 @@ ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq entry: - %0 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %x, i32 13, i32 4) + %0 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %x, i32 13, <16 x i1> , i32 4) %1 = bitcast <16 x i1> %0 to i16 - %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %b, <16 x float> %x, i32 13, i32 4) + %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %b, <16 x float> %x, i32 13, <16 x i1> , i32 4) %3 = bitcast <16 x i1> %2 to i16 - %4 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %c, <16 x float> %x, i32 13, i32 4) + %4 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %c, <16 x float> %x, i32 13, <16 x i1> , i32 4) %5 = bitcast <16 x i1> %4 to i16 - %6 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %d, <16 x float> %x, i32 13, i32 4) + %6 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %d, <16 x float> %x, i32 13, <16 x i1> , i32 4) %7 = bitcast <16 x i1> %6 to i16 %8 = bitcast i16 %1 to <16 x i1> %9 = bitcast i16 %3 to <16 x i1> @@ -46,7 +46,7 @@ } ; Function Attrs: nounwind readnone -declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) #1 +declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) #1 attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/X86/avx512-cmp-mask.ll b/llvm/test/CodeGen/X86/avx512-cmp-mask.ll --- a/llvm/test/CodeGen/X86/avx512-cmp-mask.ll +++ b/llvm/test/CodeGen/X86/avx512-cmp-mask.ll @@ -1,40 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512VL ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQVL define <4 x i64> @PR32546(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) { -; AVX512F-LABEL: PR32546: -; AVX512F: ## %bb.0: ## %entry -; AVX512F-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3 -; AVX512F-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2 -; AVX512F-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vcmpltps %zmm3, %zmm2, %k1 -; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k0 {%k1} -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: movzbl %al, %eax -; AVX512F-NEXT: vmovd %eax, %xmm0 -; AVX512F-NEXT: vpbroadcastd %xmm0, %ymm0 -; AVX512F-NEXT: retq -; -; AVX512DQ-LABEL: PR32546: -; AVX512DQ: ## %bb.0: ## %entry -; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3 -; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2 -; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm2, %k1 -; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm0, %k0 {%k1} -; AVX512DQ-NEXT: kmovb %k0, %eax -; AVX512DQ-NEXT: vmovd %eax, %xmm0 -; AVX512DQ-NEXT: vpbroadcastd %xmm0, %ymm0 -; AVX512DQ-NEXT: retq +; AVX512VL-LABEL: PR32546: +; AVX512VL: ## %bb.0: ## %entry +; AVX512VL-NEXT: vcmpltps %ymm1, %ymm0, %k0 +; AVX512VL-NEXT: vcmpltps %ymm3, %ymm2, %k1 +; AVX512VL-NEXT: kandw %k0, %k1, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpbroadcastd %eax, %ymm0 +; AVX512VL-NEXT: retq ; ; AVX512DQVL-LABEL: PR32546: ; AVX512DQVL: ## %bb.0: ## %entry +; AVX512DQVL-NEXT: vcmpltps %ymm1, %ymm0, %k0 ; AVX512DQVL-NEXT: vcmpltps %ymm3, %ymm2, %k1 -; AVX512DQVL-NEXT: vcmpltps %ymm1, %ymm0, %k0 {%k1} +; AVX512DQVL-NEXT: kandb %k0, %k1, %k0 ; AVX512DQVL-NEXT: kmovb %k0, %eax ; AVX512DQVL-NEXT: vpbroadcastd %eax, %ymm0 ; AVX512DQVL-NEXT: retq @@ -48,4 +31,108 @@ %3 = bitcast <8 x i32> %vecinit7.i to <4 x i64> ret <4 x i64> %3 } - declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32, i8) + +define void @PR32547(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, float* %p) { +; CHECK-LABEL: PR32547: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %k0 +; CHECK-NEXT: vcmpltps %ymm3, %ymm2, %k1 +; CHECK-NEXT: kunpckbw %k1, %k0, %k1 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + entry: + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 1, i8 -1) + %1 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %c, <8 x float> %d, i32 1, i8 -1) + %conv.i = zext i8 %0 to i16 + %conv.i18 = zext i8 %1 to i16 + %shl = shl nuw i16 %conv.i, 8 + %or = or i16 %shl, %conv.i18 + %2 = bitcast float* %p to <16 x float>* + %3 = bitcast i16 %or to <16 x i1> + tail call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> zeroinitializer, <16 x float>* %2, i32 64, <16 x i1> %3) #4 + ret void +} + +define void @PR32547_swap(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, float* %p) { +; CHECK-LABEL: PR32547_swap: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %k0 +; CHECK-NEXT: vcmpltps %ymm3, %ymm2, %k1 +; CHECK-NEXT: kunpckbw %k1, %k0, %k1 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + entry: + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 1, i8 -1) + %1 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %c, <8 x float> %d, i32 1, i8 -1) + %conv.i = zext i8 %0 to i16 + %conv.i18 = zext i8 %1 to i16 + %shl = shl nuw i16 %conv.i, 8 + %or = or i16 %conv.i18, %shl + %2 = bitcast float* %p to <16 x float>* + %3 = bitcast i16 %or to <16 x i1> + tail call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> zeroinitializer, <16 x float>* %2, i32 64, <16 x i1> %3) #4 + ret void +} + +define void @mask_cmp_128(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* %p) { +; AVX512VL-LABEL: mask_cmp_128: +; AVX512VL: ## %bb.0: ## %entry +; AVX512VL-NEXT: vcmpltps %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: vcmpltps %xmm3, %xmm2, %k0 +; AVX512VL-NEXT: shlb $4, %al +; AVX512VL-NEXT: kmovw %eax, %k1 +; AVX512VL-NEXT: korw %k1, %k0, %k1 +; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %ymm0, (%rdi) {%k1} +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512DQVL-LABEL: mask_cmp_128: +; AVX512DQVL: ## %bb.0: ## %entry +; AVX512DQVL-NEXT: vcmpltps %xmm1, %xmm0, %k0 +; AVX512DQVL-NEXT: vcmpltps %xmm3, %xmm2, %k1 +; AVX512DQVL-NEXT: kshiftlb $4, %k0, %k0 +; AVX512DQVL-NEXT: korb %k0, %k1, %k1 +; AVX512DQVL-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512DQVL-NEXT: vmovaps %ymm0, (%rdi) {%k1} +; AVX512DQVL-NEXT: vzeroupper +; AVX512DQVL-NEXT: retq +entry: + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 1, i8 -1) + %1 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %c, <4 x float> %d, i32 1, i8 -1) + %shl = shl nuw i8 %0, 4 + %or = or i8 %1, %shl + %2 = bitcast float* %p to <8 x float>* + %3 = bitcast i8 %or to <8 x i1> + tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %2, i32 64, <8 x i1> %3) + ret void +} + +define <16 x float> @mask_cmp_512(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d, float* %p) { +; CHECK-LABEL: mask_cmp_512: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: vcmpltps {sae}, %zmm1, %zmm0, %k0 +; CHECK-NEXT: vcmpltps %zmm3, %zmm2, %k1 +; CHECK-NEXT: kxnorw %k1, %k0, %k1 +; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} +; CHECK-NEXT: retq + entry: + %0 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 1, i16 -1, i32 8) + %1 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %c, <16 x float> %d, i32 1, i16 -1, i32 4) + %2 = bitcast float* %p to <16 x float>* + %3 = load <16 x float>, <16 x float>* %2 + %4 = xor i16 %0, %1 + %5 = bitcast i16 %4 to <16 x i1> + %6 = select <16 x i1> %5, <16 x float> zeroinitializer, <16 x float> %3 + ret <16 x float> %6 +} +declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32, i8) +declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32, i8) +declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i16, i32) +declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32, <8 x i1>) +declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>) diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -10870,3 +10870,32 @@ } declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask) + +define <16 x float> @test_cmp_512(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d, float* %p) { +; X86-LABEL: test_cmp_512: +; X86: ## %bb.0: ## %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcmpltps {sae}, %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x01] +; X86-NEXT: vcmpltps %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xcb,0x01] +; X86-NEXT: kxnorw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc9] +; X86-NEXT: vmovaps (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x00] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_cmp_512: +; X64: ## %bb.0: ## %entry +; X64-NEXT: vcmpltps {sae}, %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x01] +; X64-NEXT: vcmpltps %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xcb,0x01] +; X64-NEXT: kxnorw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc9] +; X64-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x07] +; X64-NEXT: retq ## encoding: [0xc3] + entry: + %0 = tail call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 1, i32 8) + %1 = tail call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %c, <16 x float> %d, i32 1, i32 4) + %2 = bitcast float* %p to <16 x float>* + %3 = load <16 x float>, <16 x float>* %2 + %4 = xor <16 x i1> %0, %1 + %5 = select <16 x i1> %4, <16 x float> zeroinitializer, <16 x float> %3 + ret <16 x float> %5 +} + +declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -1046,11 +1046,11 @@ ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: ret{{[l|q]}} - %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8) + %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, <16 x i1> , i32 8) %1 = bitcast <16 x i1> %res to i16 ret i16 %1 } -declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) +declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) { ; CHECK-LABEL: test_cmppd: @@ -1060,11 +1060,11 @@ ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: ret{{[l|q]}} - %res = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i32 4) + %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, <8 x i1> , i32 4) %1 = bitcast <8 x i1> %res to i8 ret i8 %1 } -declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32) +declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, <8 x i1>, i32) ; Function Attrs: nounwind readnone @@ -7521,9 +7521,9 @@ ; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl entry: - %0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4) + %0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, <8 x i1> , i32 4) %1 = bitcast <8 x i1> %0 to i8 - %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %c, <8 x double> %d, i32 17, i32 4) + %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %c, <8 x double> %d, i32 17, <8 x i1> , i32 4) %3 = bitcast <8 x i1> %2 to i8 %conv = zext i8 %1 to i16 %conv2 = zext i8 %3 to i16 @@ -7561,7 +7561,7 @@ ; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl entry: - %0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4) + %0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, <8 x i1> , i32 4) %1 = bitcast <8 x i1> %0 to i8 %conv = zext i8 %1 to i16 %2 = bitcast i16 %conv to <16 x i1> diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -17233,3 +17233,90 @@ } declare <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask) + +define void @test_cmp_128(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* %p) { +; X86-LABEL: test_cmp_128: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp # encoding: [0x55] +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] +; X86-NEXT: subl $16, %esp # encoding: [0x83,0xec,0x10] +; X86-NEXT: movl 24(%ebp), %eax # encoding: [0x8b,0x45,0x18] +; X86-NEXT: vcmpltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x01] +; X86-NEXT: vcmpltps 8(%ebp), %xmm2, %k1 # encoding: [0x62,0xf1,0x6c,0x08,0xc2,0x8d,0x08,0x00,0x00,0x00,0x01] +; X86-NEXT: kshiftlw $4, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x04] +; X86-NEXT: korw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x45,0xc9] +; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] +; X86-NEXT: vmovaps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x00] +; X86-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] +; X86-NEXT: popl %ebp # encoding: [0x5d] +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_cmp_128: +; X64: # %bb.0: # %entry +; X64-NEXT: vcmpltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x01] +; X64-NEXT: vcmpltps %xmm3, %xmm2, %k1 # encoding: [0x62,0xf1,0x6c,0x08,0xc2,0xcb,0x01] +; X64-NEXT: kshiftlw $4, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x04] +; X64-NEXT: korw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x45,0xc9] +; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] +; X64-NEXT: vmovaps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] + entry: + %0 = tail call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 1) + %1 = tail call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %c, <4 x float> %d, i32 1) + %2 = bitcast float* %p to <8 x float>* + %3 = shufflevector <4 x i1> %0, <4 x i1> %1, <8 x i32> + tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %2, i32 64, <8 x i1> %3) #4 + ret void +} + +define void @test_cmp_256(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, float* %p) { +; X86-LABEL: test_cmp_256: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp # encoding: [0x55] +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-32, %esp # encoding: [0x83,0xe4,0xe0] +; X86-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] +; X86-NEXT: movl 40(%ebp), %eax # encoding: [0x8b,0x45,0x28] +; X86-NEXT: vcmpltps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x01] +; X86-NEXT: vcmpltps 8(%ebp), %ymm2, %k1 # encoding: [0x62,0xf1,0x6c,0x28,0xc2,0x8d,0x08,0x00,0x00,0x00,0x01] +; X86-NEXT: kunpckbw %k0, %k1, %k1 # encoding: [0xc5,0xf5,0x4b,0xc8] +; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] +; X86-NEXT: vmovaps %zmm0, (%eax) {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x29,0x00] +; X86-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] +; X86-NEXT: popl %ebp # encoding: [0x5d] +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_cmp_256: +; X64: # %bb.0: # %entry +; X64-NEXT: vcmpltps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x01] +; X64-NEXT: vcmpltps %ymm3, %ymm2, %k1 # encoding: [0x62,0xf1,0x6c,0x28,0xc2,0xcb,0x01] +; X64-NEXT: kunpckbw %k0, %k1, %k1 # encoding: [0xc5,0xf5,0x4b,0xc8] +; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] +; X64-NEXT: vmovaps %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x29,0x07] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] + entry: + %0 = tail call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 1) + %1 = tail call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %c, <8 x float> %d, i32 1) + %2 = bitcast float* %p to <16 x float>* + %3 = shufflevector <8 x i1> %0, <8 x i1> %1, <16 x i32> + tail call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> zeroinitializer, <16 x float>* %2, i32 64, <16 x i1> %3) #4 + ret void +} + +declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32) +declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32) +declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32, <8 x i1>) +declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>) diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -770,11 +770,11 @@ ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2) + %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, <8 x i1> ) %1 = bitcast <8 x i1> %res to i8 ret i8 %1 } -declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32) +declare <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32, <8 x i1>) define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: test_cmpps_128: @@ -783,12 +783,12 @@ ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2) + %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, <4 x i1> ) %1 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> %2 = bitcast <8 x i1> %1 to i8 ret i8 %2 } -declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32) +declare <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32, <4 x i1>) define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: test_cmppd_256: @@ -798,12 +798,12 @@ ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2) + %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, <4 x i1> ) %1 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> %2 = bitcast <8 x i1> %1 to i8 ret i8 %2 } -declare <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double>, <4 x double>, i32) +declare <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32, <4 x i1>) define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_cmppd_128: @@ -812,12 +812,12 @@ ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2) + %res = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, <2 x i1> ) %1 = shufflevector <2 x i1> %res, <2 x i1> zeroinitializer, <8 x i32> %2 = bitcast <8 x i1> %1 to i8 ret i8 %2 } -declare <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double>, <2 x double>, i32) +declare <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32, <2 x i1>) define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { ; X86-LABEL: test_mm512_maskz_max_ps_256: diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll --- a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -19358,7 +19358,7 @@ } -declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) +declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry @@ -20799,7 +20799,7 @@ entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> - %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) + %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> , i32 8) %3 = bitcast <16 x i1> %2 to i16 %4 = zext i16 %3 to i32 ret i32 %4 @@ -20824,7 +20824,7 @@ entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> - %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) + %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> , i32 8) %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = bitcast <16 x i1> %4 to i16 @@ -21002,7 +21002,7 @@ entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> - %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) + %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> , i32 8) %3 = bitcast <16 x i1> %2 to i16 %4 = zext i16 %3 to i64 ret i64 %4 @@ -21027,7 +21027,7 @@ entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> - %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) + %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> , i32 8) %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = bitcast <16 x i1> %4 to i16 @@ -21037,7 +21037,7 @@ -declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32) +declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, <8 x i1>, i32) define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask: ; VLX: # %bb.0: # %entry @@ -22867,7 +22867,7 @@ entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) + %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> , i32 8) %3 = bitcast <8 x i1> %2 to i8 %4 = zext i8 %3 to i16 ret i16 %4 @@ -22896,7 +22896,7 @@ entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) + %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> , i32 8) %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = bitcast <8 x i1> %4 to i8 @@ -23082,7 +23082,7 @@ entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) + %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> , i32 8) %3 = bitcast <8 x i1> %2 to i8 %4 = zext i8 %3 to i32 ret i32 %4 @@ -23109,7 +23109,7 @@ entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) + %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> , i32 8) %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = bitcast <8 x i1> %4 to i8 @@ -23295,7 +23295,7 @@ entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) + %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> , i32 8) %3 = bitcast <8 x i1> %2 to i8 %4 = zext i8 %3 to i64 ret i64 %4 @@ -23322,7 +23322,7 @@ entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) + %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> , i32 8) %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = bitcast <8 x i1> %4 to i8 @@ -23345,7 +23345,7 @@ ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq - %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8) + %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, <16 x i1> , i32 8) %1 = bitcast <16 x i1> %res to i16 %cast = bitcast i16 %1 to <16 x i1> %shuffle = shufflevector <16 x i1> %cast, <16 x i1> zeroinitializer, <32 x i32> diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll @@ -312,11 +312,11 @@ ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() - %res = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0, i32 4) + %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0, <8 x i1> , i32 4) %2 = bitcast <8 x i1> %res to i8 ret i8 %2 } -declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32) +declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, <8 x i1>, i32) define <8 x double> @stack_fold_cmppd_mask(<8 x double> %a0, <8 x double> %a1, <8 x double>* %a2, i8 %mask, <8 x double> %b0, <8 x double> %b1) { ; CHECK-LABEL: stack_fold_cmppd_mask: @@ -332,8 +332,9 @@ ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 +; CHECK-NEXT: vcmpeqpd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k0 # 64-byte Folded Reload ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vcmpeqpd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k1 {%k1} # 64-byte Folded Reload +; CHECK-NEXT: kandb %k0, %k1, %k1 ; CHECK-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vblendmpd (%rsp), %zmm0, %zmm0 {%k1} # 64-byte Folded Reload ; CHECK-NEXT: addq $184, %rsp @@ -344,7 +345,7 @@ %2 = load <8 x double>, <8 x double>* %a2 %3 = fadd <8 x double> %a1, %2 %4 = bitcast i8 %mask to <8 x i1> - %5 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %3, <8 x double> %a0, i32 0, i32 4) + %5 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %3, <8 x double> %a0, i32 0, <8 x i1> , i32 4) %6 = and <8 x i1> %4, %5 %7 = select <8 x i1> %6, <8 x double> %b0, <8 x double> %b1 ret <8 x double> %7 @@ -364,8 +365,9 @@ ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 +; CHECK-NEXT: vcmpeqpd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k0 # 64-byte Folded Reload ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vcmpeqpd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k1 {%k1} # 64-byte Folded Reload +; CHECK-NEXT: kandb %k0, %k1, %k1 ; CHECK-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vblendmpd (%rsp), %zmm0, %zmm0 {%k1} # 64-byte Folded Reload ; CHECK-NEXT: addq $184, %rsp @@ -376,7 +378,7 @@ %2 = load <8 x double>, <8 x double>* %a2 %3 = fadd <8 x double> %a1, %2 %4 = bitcast i8 %mask to <8 x i1> - %5 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a0, <8 x double> %3, i32 0, i32 4) + %5 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a0, <8 x double> %3, i32 0, <8 x i1> , i32 4) %6 = and <8 x i1> %4, %5 %7 = select <8 x i1> %6, <8 x double> %b0, <8 x double> %b1 ret <8 x double> %7 @@ -395,11 +397,11 @@ ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() - %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0, i32 4) + %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0, <16 x i1> , i32 4) %2 = bitcast <16 x i1> %res to i16 ret i16 %2 } -declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) +declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) define <16 x float> @stack_fold_cmpps_mask(<16 x float> %a0, <16 x float> %a1, <16 x float>* %a2, i16 %mask, <16 x float> %b0, <16 x float> %b1) { ; CHECK-LABEL: stack_fold_cmpps_mask: @@ -415,8 +417,9 @@ ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vaddps (%rdi), %zmm0, %zmm0 +; CHECK-NEXT: vcmpeqps {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k0 # 64-byte Folded Reload ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vcmpeqps {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k1 {%k1} # 64-byte Folded Reload +; CHECK-NEXT: kandw %k0, %k1, %k1 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vblendmps (%rsp), %zmm0, %zmm0 {%k1} # 64-byte Folded Reload ; CHECK-NEXT: addq $184, %rsp @@ -427,7 +430,7 @@ %2 = load <16 x float>, <16 x float>* %a2 %3 = fadd <16 x float> %a1, %2 %4 = bitcast i16 %mask to <16 x i1> - %5 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %3, <16 x float> %a0, i32 0, i32 4) + %5 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %3, <16 x float> %a0, i32 0, <16 x i1> , i32 4) %6 = and <16 x i1> %4, %5 %7 = select <16 x i1> %6, <16 x float> %b0, <16 x float> %b1 ret <16 x float> %7 @@ -447,8 +450,9 @@ ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vaddps (%rdi), %zmm0, %zmm0 +; CHECK-NEXT: vcmpeqps {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k0 # 64-byte Folded Reload ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vcmpeqps {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k1 {%k1} # 64-byte Folded Reload +; CHECK-NEXT: kandw %k0, %k1, %k1 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vblendmps (%rsp), %zmm0, %zmm0 {%k1} # 64-byte Folded Reload ; CHECK-NEXT: addq $184, %rsp @@ -459,7 +463,7 @@ %2 = load <16 x float>, <16 x float>* %a2 %3 = fadd <16 x float> %a1, %2 %4 = bitcast i16 %mask to <16 x i1> - %5 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a0, <16 x float> %3, i32 0, i32 4) + %5 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a0, <16 x float> %3, i32 0, <16 x i1> , i32 4) %6 = and <16 x i1> %4, %5 %7 = select <16 x i1> %6, <16 x float> %b0, <16 x float> %b1 ret <16 x float> %7 diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll @@ -249,12 +249,12 @@ ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() - %res = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %a0, <2 x double> %a1, i32 0) + %res = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a0, <2 x double> %a1, i32 0, <2 x i1> ) %2 = shufflevector <2 x i1> %res, <2 x i1> zeroinitializer, <8 x i32> %3 = bitcast <8 x i1> %2 to i8 ret i8 %3 } -declare <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double>, <2 x double>, i32) +declare <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32, <2 x i1>) define i8 @stack_fold_cmppd_ymm(<4 x double> %a0, <4 x double> %a1) { ; CHECK-LABEL: stack_fold_cmppd_ymm: @@ -269,12 +269,12 @@ ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() - %res = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i32 0) + %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i32 0, <4 x i1> ) %2 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> %3 = bitcast <8 x i1> %2 to i8 ret i8 %3 } -declare <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double>, <4 x double>, i32) +declare <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32, <4 x i1>) define i8 @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: stack_fold_cmpps: @@ -288,12 +288,12 @@ ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() - %res = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %a0, <4 x float> %a1, i32 0) + %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a0, <4 x float> %a1, i32 0, <4 x i1> ) %2 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> %3 = bitcast <8 x i1> %2 to i8 ret i8 %3 } -declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32) +declare <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32, <4 x i1>) define i8 @stack_fold_cmpps_ymm(<8 x float> %a0, <8 x float> %a1) { ; CHECK-LABEL: stack_fold_cmpps_ymm: @@ -308,11 +308,11 @@ ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() - %res = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 0) + %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 0, <8 x i1> ) %2 = bitcast <8 x i1> %res to i8 ret i8 %2 } -declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32) +declare <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32, <8 x i1>) define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: stack_fold_divpd: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll --- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll @@ -881,159 +881,3 @@ %d = bitcast <64 x i1> %c to i64 ret i64 %d } - -; OR(KSHIFTL(X,8),Y) -> KUNPCKBW -define void @PR32547(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, float* %p) { -; AVX512F-LABEL: PR32547: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 -; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 -; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k0 -; AVX512F-NEXT: vcmpltps %zmm3, %zmm2, %k1 -; AVX512F-NEXT: kunpckbw %k1, %k0, %k1 -; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vmovaps %zmm0, (%rdi) {%k1} -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: PR32547: -; AVX512VL: # %bb.0: # %entry -; AVX512VL-NEXT: vcmpltps %ymm1, %ymm0, %k0 -; AVX512VL-NEXT: vcmpltps %ymm3, %ymm2, %k1 -; AVX512VL-NEXT: kunpckbw %k1, %k0, %k1 -; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vmovaps %zmm0, (%rdi) {%k1} -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq -; -; VL_BW_DQ-LABEL: PR32547: -; VL_BW_DQ: # %bb.0: # %entry -; VL_BW_DQ-NEXT: vcmpltps %ymm1, %ymm0, %k0 -; VL_BW_DQ-NEXT: vcmpltps %ymm3, %ymm2, %k1 -; VL_BW_DQ-NEXT: kunpckbw %k1, %k0, %k1 -; VL_BW_DQ-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; VL_BW_DQ-NEXT: vmovaps %zmm0, (%rdi) {%k1} -; VL_BW_DQ-NEXT: vzeroupper -; VL_BW_DQ-NEXT: retq - entry: - %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 1, i8 -1) - %1 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %c, <8 x float> %d, i32 1, i8 -1) - %conv.i = zext i8 %0 to i16 - %conv.i18 = zext i8 %1 to i16 - %shl = shl nuw i16 %conv.i, 8 - %or = or i16 %shl, %conv.i18 - %2 = bitcast float* %p to <16 x float>* - %3 = bitcast i16 %or to <16 x i1> - tail call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> zeroinitializer, <16 x float>* %2, i32 64, <16 x i1> %3) #4 - ret void -} - -; OR(X, KSHIFTL(Y,8)) -> KUNPCKBW -define void @PR32547_swap(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, float* %p) { -; AVX512F-LABEL: PR32547_swap: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 -; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 -; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k0 -; AVX512F-NEXT: vcmpltps %zmm3, %zmm2, %k1 -; AVX512F-NEXT: kunpckbw %k1, %k0, %k1 -; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vmovaps %zmm0, (%rdi) {%k1} -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: PR32547_swap: -; AVX512VL: # %bb.0: # %entry -; AVX512VL-NEXT: vcmpltps %ymm1, %ymm0, %k0 -; AVX512VL-NEXT: vcmpltps %ymm3, %ymm2, %k1 -; AVX512VL-NEXT: kunpckbw %k1, %k0, %k1 -; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vmovaps %zmm0, (%rdi) {%k1} -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq -; -; VL_BW_DQ-LABEL: PR32547_swap: -; VL_BW_DQ: # %bb.0: # %entry -; VL_BW_DQ-NEXT: vcmpltps %ymm1, %ymm0, %k0 -; VL_BW_DQ-NEXT: vcmpltps %ymm3, %ymm2, %k1 -; VL_BW_DQ-NEXT: kunpckbw %k1, %k0, %k1 -; VL_BW_DQ-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; VL_BW_DQ-NEXT: vmovaps %zmm0, (%rdi) {%k1} -; VL_BW_DQ-NEXT: vzeroupper -; VL_BW_DQ-NEXT: retq - entry: - %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 1, i8 -1) - %1 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %c, <8 x float> %d, i32 1, i8 -1) - %conv.i = zext i8 %0 to i16 - %conv.i18 = zext i8 %1 to i16 - %shl = shl nuw i16 %conv.i, 8 - %or = or i16 %conv.i18, %shl - %2 = bitcast float* %p to <16 x float>* - %3 = bitcast i16 %or to <16 x i1> - tail call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> zeroinitializer, <16 x float>* %2, i32 64, <16 x i1> %3) #4 - ret void -} - -define void @mask_cmp_128(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* %p) { -; AVX512F-LABEL: mask_cmp_128: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 -; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 -; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: vcmpltps %zmm3, %zmm2, %k0 -; AVX512F-NEXT: kshiftlw $12, %k0, %k0 -; AVX512F-NEXT: kshiftrw $12, %k0, %k0 -; AVX512F-NEXT: shlb $4, %al -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: kshiftlw $8, %k0, %k0 -; AVX512F-NEXT: kshiftrw $8, %k0, %k1 -; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vmovaps %zmm0, (%rdi) {%k1} -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: mask_cmp_128: -; AVX512VL: # %bb.0: # %entry -; AVX512VL-NEXT: vcmpltps %xmm1, %xmm0, %k0 -; AVX512VL-NEXT: kmovw %k0, %eax -; AVX512VL-NEXT: vcmpltps %xmm3, %xmm2, %k0 -; AVX512VL-NEXT: shlb $4, %al -; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: korw %k1, %k0, %k1 -; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vmovaps %ymm0, (%rdi) {%k1} -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq -; -; VL_BW_DQ-LABEL: mask_cmp_128: -; VL_BW_DQ: # %bb.0: # %entry -; VL_BW_DQ-NEXT: vcmpltps %xmm1, %xmm0, %k0 -; VL_BW_DQ-NEXT: vcmpltps %xmm3, %xmm2, %k1 -; VL_BW_DQ-NEXT: kshiftlb $4, %k0, %k0 -; VL_BW_DQ-NEXT: korb %k0, %k1, %k1 -; VL_BW_DQ-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; VL_BW_DQ-NEXT: vmovaps %ymm0, (%rdi) {%k1} -; VL_BW_DQ-NEXT: vzeroupper -; VL_BW_DQ-NEXT: retq -entry: - %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 1, i8 -1) - %1 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %c, <4 x float> %d, i32 1, i8 -1) - %shl = shl nuw i8 %0, 4 - %or = or i8 %1, %shl - %2 = bitcast float* %p to <8 x float>* - %3 = bitcast i8 %or to <8 x i1> - tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %2, i32 64, <8 x i1> %3) - ret void -} -declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32, i8) -declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32, i8) -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32, <8 x i1>) -declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>) diff --git a/llvm/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll b/llvm/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll deleted file mode 100644 --- a/llvm/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll +++ /dev/null @@ -1,210 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s - -; The test checks the folding of cmp(sub(a,b),0) into cmp(a,b). - -define i8 @sub_compare_foldingPD128_safe(<2 x double> %a, <2 x double> %b){ -; CHECK-LABEL: @sub_compare_foldingPD128_safe( -; CHECK-NEXT: [[SUB_SAFE:%.*]] = fsub <2 x double> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[SUB_SAFE]], <2 x double> zeroinitializer, i32 5) -; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> -; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8 -; CHECK-NEXT: ret i8 [[T2]] -; - %sub.safe = fsub <2 x double> %a, %b - %t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %sub.safe, <2 x double> zeroinitializer, i32 5) - %t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> - %t2 = bitcast <8 x i1> %t1 to i8 - ret i8 %t2 -} - -define i8 @sub_compare_foldingPD128(<2 x double> %a, <2 x double> %b){ -; CHECK-LABEL: @sub_compare_foldingPD128( -; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5) -; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> -; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8 -; CHECK-NEXT: ret i8 [[T2]] -; - %sub.i = fsub ninf <2 x double> %a, %b - %t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %sub.i, <2 x double> zeroinitializer, i32 5) - %t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> - %t2 = bitcast <8 x i1> %t1 to i8 - ret i8 %t2 -} - -define i8 @sub_compare_foldingPD128_undef_elt(<2 x double> %a, <2 x double> %b){ -; CHECK-LABEL: @sub_compare_foldingPD128_undef_elt( -; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5) -; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> -; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8 -; CHECK-NEXT: ret i8 [[T2]] -; - %sub.i = fsub ninf <2 x double> %a, %b - %t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %sub.i, <2 x double> , i32 5) - %t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> - %t2 = bitcast <8 x i1> %t1 to i8 - ret i8 %t2 -} - -define i8 @sub_compare_foldingPD256(<4 x double> %a, <4 x double> %b){ -; CHECK-LABEL: @sub_compare_foldingPD256( -; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> [[A:%.*]], <4 x double> [[B:%.*]], i32 5) -; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> -; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8 -; CHECK-NEXT: ret i8 [[T2]] -; - %sub.i1 = fsub ninf <4 x double> %a, %b - %t0 = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5) - %t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> - %t2 = bitcast <8 x i1> %t1 to i8 - ret i8 %t2 -} - -define i8 @sub_compare_foldingPD512(<8 x double> %a, <8 x double> %b){ -; CHECK-LABEL: @sub_compare_foldingPD512( -; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 11, i32 4) -; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8 -; CHECK-NEXT: ret i8 [[T1]] -; - %sub.i2 = fsub ninf <8 x double> %a, %b - %t0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %sub.i2, <8 x double> zeroinitializer, i32 11, i32 4) - %t1 = bitcast <8 x i1> %t0 to i8 - ret i8 %t1 -} - -define i8 @sub_compare_foldingPS128(<4 x float> %a, <4 x float> %b){ -; CHECK-LABEL: @sub_compare_foldingPS128( -; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 12) -; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> -; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8 -; CHECK-NEXT: ret i8 [[T2]] -; - %sub.i3 = fsub ninf <4 x float> %a, %b - %t0 = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %sub.i3, <4 x float> zeroinitializer, i32 12) - %t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> - %t2 = bitcast <8 x i1> %t1 to i8 - ret i8 %t2 -} - -define i8 @sub_compare_foldingPS256(<8 x float> %a, <8 x float> %b){ -; CHECK-LABEL: @sub_compare_foldingPS256( -; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], i32 5) -; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8 -; CHECK-NEXT: ret i8 [[T1]] -; - %sub.i4 = fsub ninf <8 x float> %a, %b - %t0 = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %sub.i4, <8 x float> zeroinitializer, i32 5) - %t1 = bitcast <8 x i1> %t0 to i8 - ret i8 %t1 -} - -define i16 @sub_compare_foldingPS512(<16 x float> %a, <16 x float> %b){ -; CHECK-LABEL: @sub_compare_foldingPS512( -; CHECK-NEXT: [[T0:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 11, i32 4) -; CHECK-NEXT: [[T1:%.*]] = bitcast <16 x i1> [[T0]] to i16 -; CHECK-NEXT: ret i16 [[T1]] -; - %sub.i5 = fsub ninf <16 x float> %a, %b - %t0 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %sub.i5, <16 x float> zeroinitializer, i32 11, i32 4) - %t1 = bitcast <16 x i1> %t0 to i16 - ret i16 %t1 -} - -define i8 @sub_compare_folding_swapPD128(<2 x double> %a, <2 x double> %b){ -; CHECK-LABEL: @sub_compare_folding_swapPD128( -; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[B:%.*]], <2 x double> [[A:%.*]], i32 5) -; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> -; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8 -; CHECK-NEXT: ret i8 [[T2]] -; - %sub.i = fsub ninf <2 x double> %a, %b - %t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> zeroinitializer, <2 x double> %sub.i, i32 5) - %t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> - %t2 = bitcast <8 x i1> %t1 to i8 - ret i8 %t2 -} - -define i8 @sub_compare_folding_swapPD256(<4 x double> %a, <4 x double> %b){ -; CHECK-LABEL: @sub_compare_folding_swapPD256( -; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> [[B:%.*]], <4 x double> [[A:%.*]], i32 5) -; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> -; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8 -; CHECK-NEXT: ret i8 [[T2]] -; - %sub.i = fsub ninf <4 x double> %a, %b - %t0 = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> %sub.i, i32 5) - %t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> - %t2 = bitcast <8 x i1> %t1 to i8 - ret i8 %t2 -} - -define i8 @sub_compare_folding_swapPD256_undef(<4 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @sub_compare_folding_swapPD256_undef( -; CHECK-NEXT: [[TMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> undef, <4 x double> zeroinitializer, i32 5) -; CHECK-NEXT: [[T0:%.*]] = shufflevector <4 x i1> [[TMP]], <4 x i1> zeroinitializer, <8 x i32> -; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8 -; CHECK-NEXT: ret i8 [[T1]] -; - %sub.i1 = fsub ninf <4 x double> undef, undef - %tmp = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5) - %t0 = shufflevector <4 x i1> %tmp, <4 x i1> zeroinitializer, <8 x i32> - %t1 = bitcast <8 x i1> %t0 to i8 - ret i8 %t1 -} - -define i8 @sub_compare_folding_swapPD512(<8 x double> %a, <8 x double> %b){ -; CHECK-LABEL: @sub_compare_folding_swapPD512( -; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> [[B:%.*]], <8 x double> [[A:%.*]], i32 11, i32 4) -; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8 -; CHECK-NEXT: ret i8 [[T1]] -; - %sub.i = fsub ninf <8 x double> %a, %b - %t0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> zeroinitializer, <8 x double> %sub.i, i32 11, i32 4) - %t1 = bitcast <8 x i1> %t0 to i8 - ret i8 %t1 -} - -define i8 @sub_compare_folding_swapPS128(<4 x float> %a, <4 x float> %b){ -; CHECK-LABEL: @sub_compare_folding_swapPS128( -; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> [[B:%.*]], <4 x float> [[A:%.*]], i32 12) -; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> -; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8 -; CHECK-NEXT: ret i8 [[T2]] -; - %sub.i = fsub ninf <4 x float> %a, %b - %t0 = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> zeroinitializer, <4 x float> %sub.i, i32 12) - %t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> - %t2 = bitcast <8 x i1> %t1 to i8 - ret i8 %t2 -} - -define i8 @sub_compare_folding_swapPS256(<8 x float> %a, <8 x float> %b){ -; CHECK-LABEL: @sub_compare_folding_swapPS256( -; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> [[B:%.*]], <8 x float> [[A:%.*]], i32 5) -; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8 -; CHECK-NEXT: ret i8 [[T1]] -; - %sub.i = fsub ninf <8 x float> %a, %b - %t0 = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> %sub.i, i32 5) - %t1 = bitcast <8 x i1> %t0 to i8 - ret i8 %t1 -} - -define i16 @sub_compare_folding_swapPS512(<16 x float> %a, <16 x float> %b){ -; CHECK-LABEL: @sub_compare_folding_swapPS512( -; CHECK-NEXT: [[T0:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> [[B:%.*]], <16 x float> [[A:%.*]], i32 11, i32 4) -; CHECK-NEXT: [[T1:%.*]] = bitcast <16 x i1> [[T0]] to i16 -; CHECK-NEXT: ret i16 [[T1]] -; - %sub.i = fsub ninf <16 x float> %a, %b - %t0 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> zeroinitializer, <16 x float> %sub.i, i32 11, i32 4) - %t1 = bitcast <16 x i1> %t0 to i16 - ret i16 %t1 -} - -declare <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double>, <2 x double>, i32) -declare <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double>, <4 x double>, i32) -declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32) -declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32) -declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32) -declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)