Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -10120,44 +10120,7 @@ return Builder.CreateExtractValue(Call, 1); } - case X86::BI__builtin_ia32_cmpps128_mask: - case X86::BI__builtin_ia32_cmpps256_mask: - case X86::BI__builtin_ia32_cmpps512_mask: - case X86::BI__builtin_ia32_cmppd128_mask: - case X86::BI__builtin_ia32_cmppd256_mask: - case X86::BI__builtin_ia32_cmppd512_mask: { - unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); - Value *MaskIn = Ops[3]; - Ops.erase(&Ops[3]); - - Intrinsic::ID ID; - switch (BuiltinID) { - default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_cmpps128_mask: - ID = Intrinsic::x86_avx512_mask_cmp_ps_128; - break; - case X86::BI__builtin_ia32_cmpps256_mask: - ID = Intrinsic::x86_avx512_mask_cmp_ps_256; - break; - case X86::BI__builtin_ia32_cmpps512_mask: - ID = Intrinsic::x86_avx512_mask_cmp_ps_512; - break; - case X86::BI__builtin_ia32_cmppd128_mask: - ID = Intrinsic::x86_avx512_mask_cmp_pd_128; - break; - case X86::BI__builtin_ia32_cmppd256_mask: - ID = Intrinsic::x86_avx512_mask_cmp_pd_256; - break; - case X86::BI__builtin_ia32_cmppd512_mask: - ID = Intrinsic::x86_avx512_mask_cmp_pd_512; - break; - } - - Value *Cmp = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); - return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn); - } - - // SSE packed comparison intrinsics + // packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: case X86::BI__builtin_ia32_cmpeqpd: return getVectorFCmpIR(CmpInst::FCMP_OEQ); @@ -10185,64 +10148,84 @@ case X86::BI__builtin_ia32_cmpps: case X86::BI__builtin_ia32_cmpps256: case X86::BI__builtin_ia32_cmppd: - case X86::BI__builtin_ia32_cmppd256: { + case X86::BI__builtin_ia32_cmppd256: + case X86::BI__builtin_ia32_cmpps128_mask: + case X86::BI__builtin_ia32_cmpps256_mask: + case X86::BI__builtin_ia32_cmpps512_mask: + case X86::BI__builtin_ia32_cmppd128_mask: + case X86::BI__builtin_ia32_cmppd256_mask: + case X86::BI__builtin_ia32_cmppd512_mask: { + // Lowering vector comparisons to fcmp instructions, while + // ignoring signalling behaviour requested + // ignoring rounding mode requested + // This is is only possible as long as FENV_ACCESS is not implemented. + // See also: https://reviews.llvm.org/D45616 + + // The third argument is the comparison condition, and integer in the + // range [0, 31] unsigned CC = cast(Ops[2])->getZExtValue() & 0x1f; - // If this one of the SSE immediates, we can use native IR. - if (CC < 8) { - FCmpInst::Predicate Pred; - switch (CC) { - case 0: Pred = FCmpInst::FCMP_OEQ; break; - case 1: Pred = FCmpInst::FCMP_OLT; break; - case 2: Pred = FCmpInst::FCMP_OLE; break; - case 3: Pred = FCmpInst::FCMP_UNO; break; - case 4: Pred = FCmpInst::FCMP_UNE; break; - case 5: Pred = FCmpInst::FCMP_UGE; break; - case 6: Pred = FCmpInst::FCMP_UGT; break; - case 7: Pred = FCmpInst::FCMP_ORD; break; - } - return getVectorFCmpIR(Pred); + + // Lowering to IR fcmp instruction. + // Ignoring requested signaling behaviour, + // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT. + FCmpInst::Predicate Pred; + switch (CC) { + case 0x00: Pred = FCmpInst::FCMP_OEQ; break; + case 0x01: Pred = FCmpInst::FCMP_OLT; break; + case 0x02: Pred = FCmpInst::FCMP_OLE; break; + case 0x03: Pred = FCmpInst::FCMP_UNO; break; + case 0x04: Pred = FCmpInst::FCMP_UNE; break; + case 0x05: Pred = FCmpInst::FCMP_UGE; break; + case 0x06: Pred = FCmpInst::FCMP_UGT; break; + case 0x07: Pred = FCmpInst::FCMP_ORD; break; + case 0x08: Pred = FCmpInst::FCMP_UEQ; break; + case 0x09: Pred = FCmpInst::FCMP_ULT; break; + case 0x0a: Pred = FCmpInst::FCMP_ULE; break; + case 0x0c: Pred = FCmpInst::FCMP_ONE; break; + case 0x0d: Pred = FCmpInst::FCMP_OGE; break; + case 0x0e: Pred = FCmpInst::FCMP_OGT; break; + case 0x10: Pred = FCmpInst::FCMP_OEQ; break; + case 0x11: Pred = FCmpInst::FCMP_OLT; break; + case 0x12: Pred = FCmpInst::FCMP_OLE; break; + case 0x13: Pred = FCmpInst::FCMP_UNO; break; + case 0x14: Pred = FCmpInst::FCMP_UNE; break; + case 0x15: Pred = FCmpInst::FCMP_UGE; break; + case 0x16: Pred = FCmpInst::FCMP_UGT; break; + case 0x17: Pred = FCmpInst::FCMP_ORD; break; + case 0x18: Pred = FCmpInst::FCMP_UEQ; break; + case 0x19: Pred = FCmpInst::FCMP_ULT; break; + case 0x1a: Pred = FCmpInst::FCMP_ULE; break; + case 0x1c: Pred = FCmpInst::FCMP_ONE; break; + case 0x1d: Pred = FCmpInst::FCMP_OGE; break; + case 0x1e: Pred = FCmpInst::FCMP_OGT; break; + // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector + // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... + case 0x0b: // FALSE_OQ + case 0x1b: // FALSE_OS + return llvm::Constant::getNullValue(ConvertType(E->getType())); + case 0x0f: // TRUE_UQ + case 0x1f: // TRUE_US + return llvm::Constant::getAllOnesValue(ConvertType(E->getType())); + + default: llvm_unreachable("Unhandled CC"); } - // We can't handle 8-31 immediates with native IR, use the intrinsic. - // Except for predicates that create constants. - Intrinsic::ID ID; + // Builtins without the _mask suffix return a vector of integers + // of the same width as the input vectors switch (BuiltinID) { - default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_cmpps: - ID = Intrinsic::x86_sse_cmp_ps; - break; - case X86::BI__builtin_ia32_cmpps256: - // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector - // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... - if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { - Value *Constant = (CC == 0xf || CC == 0x1f) ? - llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) : - llvm::Constant::getNullValue(Builder.getInt32Ty()); - Value *Vec = Builder.CreateVectorSplat( - Ops[0]->getType()->getVectorNumElements(), Constant); - return Builder.CreateBitCast(Vec, Ops[0]->getType()); - } - ID = Intrinsic::x86_avx_cmp_ps_256; - break; - case X86::BI__builtin_ia32_cmppd: - ID = Intrinsic::x86_sse2_cmp_pd; - break; - case X86::BI__builtin_ia32_cmppd256: - // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector - // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... - if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { - Value *Constant = (CC == 0xf || CC == 0x1f) ? - llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) : - llvm::Constant::getNullValue(Builder.getInt64Ty()); - Value *Vec = Builder.CreateVectorSplat( - Ops[0]->getType()->getVectorNumElements(), Constant); - return Builder.CreateBitCast(Vec, Ops[0]->getType()); - } - ID = Intrinsic::x86_avx_cmp_pd_256; - break; + case X86::BI__builtin_ia32_cmpps512_mask: + case X86::BI__builtin_ia32_cmppd512_mask: + case X86::BI__builtin_ia32_cmpps128_mask: + case X86::BI__builtin_ia32_cmpps256_mask: + case X86::BI__builtin_ia32_cmppd128_mask: + case X86::BI__builtin_ia32_cmppd256_mask: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); + return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]); + } + default: + return getVectorFCmpIR(Pred); } - - return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); } // SSE scalar comparison intrinsics Index: test/CodeGen/avx-builtins.c =================================================================== --- test/CodeGen/avx-builtins.c +++ test/CodeGen/avx-builtins.c @@ -214,25 +214,25 @@ __m128d test_mm_cmp_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmp_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 13) + // CHECK: [[CMP:%.*]] = fcmp oge <2 x double> %{{.*}}, %{{.*}} return _mm_cmp_pd(A, B, _CMP_GE_OS); } __m256d test_mm256_cmp_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_cmp_pd - // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 13) + // CHECK: [[CMP:%.*]] = fcmp oge <4 x double> %{{.*}}, %{{.*}} return _mm256_cmp_pd(A, B, _CMP_GE_OS); } __m128 test_mm_cmp_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_cmp_ps - // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 13) + // CHECK: [[CMP:%.*]] = fcmp oge <4 x float> %{{.*}}, %{{.*}} return _mm_cmp_ps(A, B, _CMP_GE_OS); } __m256 test_mm256_cmp_ps(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_cmp_ps - // CHECK: call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i8 13) + // CHECK: [[CMP:%.*]] = fcmp oge <8 x float> %{{.*}}, %{{.*}} return _mm256_cmp_ps(A, B, _CMP_GE_OS); } @@ -1401,69 +1401,117 @@ double test_mm256_cvtsd_f64(__m256d __a) { - // CHECK-LABEL: @test_mm256_cvtsd_f64 - // CHECK: extractelement <4 x double> %{{.*}}, i32 0 - return _mm256_cvtsd_f64(__a); + // CHECK-LABEL: @test_mm256_cvtsd_f64 + // CHECK: extractelement <4 x double> %{{.*}}, i32 0 + return _mm256_cvtsd_f64(__a); } int test_mm256_cvtsi256_si32(__m256i __a) { - // CHECK-LABEL: @test_mm256_cvtsi256_si32 - // CHECK: extractelement <8 x i32> %{{.*}}, i32 0 - return _mm256_cvtsi256_si32(__a); + // CHECK-LABEL: @test_mm256_cvtsi256_si32 + // CHECK: extractelement <8 x i32> %{{.*}}, i32 0 + return _mm256_cvtsi256_si32(__a); } float test_mm256_cvtss_f32(__m256 __a) { - // CHECK-LABEL: @test_mm256_cvtss_f32 - // CHECK: extractelement <8 x float> %{{.*}}, i32 0 - return _mm256_cvtss_f32(__a); + // CHECK-LABEL: @test_mm256_cvtss_f32 + // CHECK: extractelement <8 x float> %{{.*}}, i32 0 + return _mm256_cvtss_f32(__a); } __m256 test_mm256_cmp_ps_true(__m256 a, __m256 b) { - // CHECK-LABEL: @test_mm256_cmp_ps_true - // CHECK: ret <8 x float> zeroinitializer - return _mm256_cmp_ps(a, b, _CMP_FALSE_OQ); + // CHECK-LABEL: @test_mm256_cmp_ps_false + // CHECK: ret <8 x float> zeroinitializer + return _mm256_cmp_ps(a, b, _CMP_FALSE_OQ); } __m256d test_mm256_cmp_pd_false(__m256d a, __m256d b) { - // CHECK-LABEL: @test_mm256_cmp_pd_false - // CHECK: ret <4 x double> zeroinitializer + // CHECK-LABEL: @test_mm256_cmp_pd_false + // CHECK: ret <4 x double> zeroinitializer return _mm256_cmp_pd(a, b, _CMP_FALSE_OQ); } __m256 test_mm256_cmp_ps_strue(__m256 a, __m256 b) { - // CHECK-LABEL: @test_mm256_cmp_ps_strue - // CHECK: ret <8 x float> zeroinitializer - return _mm256_cmp_ps(a, b, _CMP_FALSE_OS); + // CHECK-LABEL: @test_mm256_cmp_ps_sfalse + // CHECK: ret <8 x float> zeroinitializer + return _mm256_cmp_ps(a, b, _CMP_FALSE_OS); } __m256d test_mm256_cmp_pd_sfalse(__m256d a, __m256d b) { - // CHECK-LABEL: @test_mm256_cmp_pd_sfalse - // CHECK: ret <4 x double> zeroinitializer + // CHECK-LABEL: @test_mm256_cmp_pd_sfalse + // CHECK: ret <4 x double> zeroinitializer return _mm256_cmp_pd(a, b, _CMP_FALSE_OS); } + +__m128 test_mm_cmp_ps_true(__m128 a, __m128 b) { + // CHECK-LABEL: @test_mm_cmp_ps_true + // CHECK: ret <4 x float> zeroinitializer + return _mm_cmp_ps(a, b, _CMP_FALSE_OQ); +} + +__m128 test_mm_cmp_pd_false(__m128 a, __m128 b) { + // CHECK-LABEL: @test_mm_cmp_pd_false + // CHECK: ret <4 x float> zeroinitializer + return _mm_cmp_pd(a, b, _CMP_FALSE_OQ); +} + +__m128 test_mm_cmp_ps_strue(__m128 a, __m128 b) { + // CHECK-LABEL: @test_mm_cmp_ps_strue + // CHECK: ret <4 x float> zeroinitializer + return _mm_cmp_ps(a, b, _CMP_FALSE_OS); +} + +__m128 test_mm_cmp_pd_sfalse(__m128 a, __m128 b) { + // CHECK-LABEL: @test_mm_cmp_pd_sfalse + // CHECK: ret <4 x float> zeroinitializer + return _mm_cmp_pd(a, b, _CMP_FALSE_OS); +} Index: test/CodeGen/avx-cmp-builtins.c =================================================================== --- test/CodeGen/avx-cmp-builtins.c +++ test/CodeGen/avx-cmp-builtins.c @@ -8,30 +8,6 @@ // Test LLVM IR codegen of cmpXY instructions // -__m128d test_cmp_pd(__m128d a, __m128d b) { - // Expects that the third argument in LLVM IR is immediate expression - // CHECK: @llvm.x86.sse2.cmp.pd({{.*}}, i8 13) - return _mm_cmp_pd(a, b, _CMP_GE_OS); -} - -__m128d test_cmp_ps(__m128 a, __m128 b) { - // Expects that the third argument in LLVM IR is immediate expression - // CHECK: @llvm.x86.sse.cmp.ps({{.*}}, i8 13) - return _mm_cmp_ps(a, b, _CMP_GE_OS); -} - -__m256d test_cmp_pd256(__m256d a, __m256d b) { - // Expects that the third argument in LLVM IR is immediate expression - // CHECK: @llvm.x86.avx.cmp.pd.256({{.*}}, i8 13) - return _mm256_cmp_pd(a, b, _CMP_GE_OS); -} - -__m256d test_cmp_ps256(__m256 a, __m256 b) { - // Expects that the third argument in LLVM IR is immediate expression - // CHECK: @llvm.x86.avx.cmp.ps.256({{.*}}, i8 13) - return _mm256_cmp_ps(a, b, _CMP_GE_OS); -} - __m128d test_cmp_sd(__m128d a, __m128d b) { // Expects that the third argument in LLVM IR is immediate expression // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 13) Index: test/CodeGen/avx2-builtins.c =================================================================== --- test/CodeGen/avx2-builtins.c +++ test/CodeGen/avx2-builtins.c @@ -612,9 +612,7 @@ __m256d test_mm256_i64gather_pd(double const *b, __m256i c) { // CHECK-LABEL: test_mm256_i64gather_pd - // CHECK: [[CMP:%.*]] = fcmp oeq <4 x double> - // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> - // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double> + // CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}} // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2) return _mm256_i64gather_pd(b, c, 2); } Index: test/CodeGen/avx512f-builtins.c =================================================================== --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -1279,245 +1279,317 @@ __mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmp_round_ps_mask - // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: fcmp oeq <16 x float> %{{.*}}, %{{.*}} return _mm512_cmp_round_ps_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask16 test_mm512_mask_cmp_round_ps_mask(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmp_round_ps_mask - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: [[CMP:%.*]] = fcmp oeq <16 x float> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmp_round_ps_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask16 test_mm512_cmp_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmp_ps_mask - // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: fcmp oeq <16 x float> %{{.*}}, %{{.*}} return _mm512_cmp_ps_mask(a, b, 0); } +__mmask16 test_mm512_cmp_ps_mask_true_uq(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmp_ps_mask_true_uq + // CHECK-NOT: call + // CHECK: ret i16 -1 + return _mm512_cmp_ps_mask(a, b, _CMP_TRUE_UQ); +} + +__mmask16 test_mm512_cmp_ps_mask_true_us(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmp_ps_mask_true_us + // CHECK-NOT: call + // CHECK: ret i16 -1 + return _mm512_cmp_ps_mask(a, b, _CMP_TRUE_US); +} + +__mmask16 test_mm512_cmp_ps_mask_false_oq(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmp_ps_mask_false_oq + // CHECK-NOT: call + // CHECK: ret i16 0 + return _mm512_cmp_ps_mask(a, b, _CMP_FALSE_OQ); +} + +__mmask16 test_mm512_cmp_ps_mask_false_os(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmp_ps_mask_false_os + // CHECK-NOT: call + // CHECK: ret i16 0 + return _mm512_cmp_ps_mask(a, b, _CMP_FALSE_OS); +} + __mmask16 test_mm512_mask_cmp_ps_mask(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmp_ps_mask - // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: [[CMP:%.*]] = fcmp oeq <16 x float> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmp_ps_mask(m, a, b, 0); } __mmask8 test_mm512_cmp_round_pd_mask(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmp_round_pd_mask - // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} return _mm512_cmp_round_pd_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask8 test_mm512_mask_cmp_round_pd_mask(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmp_round_pd_mask - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmp_round_pd_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask8 test_mm512_cmp_pd_mask(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmp_pd_mask - // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: fcmp oeq <8 x double> %{{.*}}, %{{.*}} return _mm512_cmp_pd_mask(a, b, 0); } +__mmask8 test_mm512_cmp_pd_mask_true_uq(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmp_pd_mask_true_uq + // CHECK-NOT: call + // CHECK: ret i8 -1 + return _mm512_cmp_pd_mask(a, b, _CMP_TRUE_UQ); +} + +__mmask8 test_mm512_cmp_pd_mask_true_us(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmp_pd_mask_true_us + // CHECK-NOT: call + // CHECK: ret i8 -1 + return _mm512_cmp_pd_mask(a, b, _CMP_TRUE_US); +} + +__mmask8 test_mm512_cmp_pd_mask_false_oq(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmp_pd_mask_false_oq + // CHECK-NOT: call + // CHECK: ret i8 0 + return _mm512_cmp_pd_mask(a, b, _CMP_FALSE_OQ); +} + +__mmask8 test_mm512_cmp_pd_mask_false_os(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmp_pd_mask_false_os + // CHECK-NOT: call + // CHECK: ret i8 0 + return _mm512_cmp_pd_mask(a, b, _CMP_FALSE_OS); +} + __mmask8 test_mm512_mask_cmp_pd_mask(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmp_pd_mask - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmp_pd_mask(m, a, b, 0); } __mmask8 test_mm512_cmpeq_pd_mask(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmpeq_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: fcmp oeq <8 x double> %{{.*}}, %{{.*}} return _mm512_cmpeq_pd_mask(a, b); } __mmask8 test_mm512_cmpeq_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmpeq_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: fcmp oeq <16 x float> %{{.*}}, %{{.*}} return _mm512_cmpeq_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpeq_pd_mask(__mmask8 k, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmpeq_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpeq_pd_mask(k, a, b); } __mmask8 test_mm512_mask_cmpeq_ps_mask(__mmask8 k, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmpeq_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: [[CMP:%.*]] = fcmp oeq <16 x float> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpeq_ps_mask(k, a, b); } __mmask8 test_mm512_cmple_pd_mask(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmple_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: fcmp ole <8 x double> %{{.*}}, %{{.*}} return _mm512_cmple_pd_mask(a, b); } __mmask8 test_mm512_cmple_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmple_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: fcmp ole <16 x float> %{{.*}}, %{{.*}} return _mm512_cmple_ps_mask(a, b); } __mmask8 test_mm512_mask_cmple_pd_mask(__mmask8 k, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmple_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: [[CMP:%.*]] = fcmp ole <8 x double> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmple_pd_mask(k, a, b); } __mmask8 test_mm512_mask_cmple_ps_mask(__mmask8 k, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmple_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: [[CMP:%.*]] = fcmp ole <16 x float> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmple_ps_mask(k, a, b); } __mmask8 test_mm512_cmplt_pd_mask(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmplt_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: fcmp olt <8 x double> %{{.*}}, %{{.*}} return _mm512_cmplt_pd_mask(a, b); } __mmask8 test_mm512_cmplt_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmplt_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: fcmp olt <16 x float> %{{.*}}, %{{.*}} return _mm512_cmplt_ps_mask(a, b); } __mmask8 test_mm512_mask_cmplt_pd_mask(__mmask8 k, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmplt_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: [[CMP:%.*]] = fcmp olt <8 x double> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmplt_pd_mask(k, a, b); } __mmask8 test_mm512_mask_cmplt_ps_mask(__mmask8 k, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmplt_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: [[CMP:%.*]] = fcmp olt <16 x float> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmplt_ps_mask(k, a, b); } __mmask8 test_mm512_cmpneq_pd_mask(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmpneq_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: fcmp une <8 x double> %{{.*}}, %{{.*}} return _mm512_cmpneq_pd_mask(a, b); } __mmask8 test_mm512_cmpneq_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmpneq_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: fcmp une <16 x float> %{{.*}}, %{{.*}} return _mm512_cmpneq_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpneq_pd_mask(__mmask8 k, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmpneq_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: [[CMP:%.*]] = fcmp une <8 x double> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpneq_pd_mask(k, a, b); } __mmask8 test_mm512_mask_cmpneq_ps_mask(__mmask8 k, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmpneq_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: [[CMP:%.*]] = fcmp une <16 x float> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpneq_ps_mask(k, a, b); } __mmask8 test_mm512_cmpnle_pd_mask(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmpnle_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: fcmp ugt <8 x double> %{{.*}}, %{{.*}} return _mm512_cmpnle_pd_mask(a, b); } __mmask8 test_mm512_cmpnle_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmpnle_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: fcmp ugt <16 x float> %{{.*}}, %{{.*}} return _mm512_cmpnle_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpnle_pd_mask(__mmask8 k, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmpnle_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: [[CMP:%.*]] = fcmp ugt <8 x double> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpnle_pd_mask(k, a, b); } __mmask8 test_mm512_mask_cmpnle_ps_mask(__mmask8 k, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmpnle_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: [[CMP:%.*]] = fcmp ugt <16 x float> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpnle_ps_mask(k, a, b); } __mmask8 test_mm512_cmpnlt_pd_mask(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmpnlt_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: fcmp uge <8 x double> %{{.*}}, %{{.*}} return _mm512_cmpnlt_pd_mask(a, b); } __mmask8 test_mm512_cmpnlt_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmpnlt_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: fcmp uge <16 x float> %{{.*}}, %{{.*}} return _mm512_cmpnlt_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpnlt_pd_mask(__mmask8 k, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmpnlt_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: [[CMP:%.*]] = fcmp uge <8 x double> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpnlt_pd_mask(k, a, b); } __mmask8 test_mm512_mask_cmpnlt_ps_mask(__mmask8 k, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmpnlt_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: [[CMP:%.*]] = fcmp uge <16 x float> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpnlt_ps_mask(k, a, b); } __mmask8 test_mm512_cmpord_pd_mask(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmpord_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: fcmp ord <8 x double> %{{.*}}, %{{.*}} return _mm512_cmpord_pd_mask(a, b); } __mmask8 test_mm512_cmpord_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmpord_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: fcmp ord <16 x float> %{{.*}}, %{{.*}} return _mm512_cmpord_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpord_pd_mask(__mmask8 k, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmpord_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: [[CMP:%.*]] = fcmp ord <8 x double> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpord_pd_mask(k, a, b); } __mmask8 test_mm512_mask_cmpord_ps_mask(__mmask8 k, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmpord_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: [[CMP:%.*]] = fcmp ord <16 x float> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpord_ps_mask(k, a, b); } __mmask8 test_mm512_cmpunord_pd_mask(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmpunord_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: fcmp uno <8 x double> %{{.*}}, %{{.*}} return _mm512_cmpunord_pd_mask(a, b); } __mmask8 test_mm512_cmpunord_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmpunord_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: fcmp uno <16 x float> %{{.*}}, %{{.*}} return _mm512_cmpunord_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpunord_pd_mask(__mmask8 k, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmpunord_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: [[CMP:%.*]] = fcmp uno <8 x double> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpunord_pd_mask(k, a, b); } __mmask8 test_mm512_mask_cmpunord_ps_mask(__mmask8 k, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmpunord_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: [[CMP:%.*]] = fcmp uno <16 x float> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpunord_ps_mask(k, a, b); } Index: test/CodeGen/avx512vl-builtins.c =================================================================== --- test/CodeGen/avx512vl-builtins.c +++ test/CodeGen/avx512vl-builtins.c @@ -1073,53 +1073,168 @@ __mmask8 test_mm256_cmp_ps_mask(__m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_cmp_ps_mask - // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256 + // CHECK: fcmp oeq <8 x float> %{{.*}}, %{{.*}} return (__mmask8)_mm256_cmp_ps_mask(__A, __B, 0); } +__mmask8 test_mm256_cmp_ps_mask_true_uq(__m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_cmp_ps_mask_true_uq + // CHECK-NOT: call + // CHECK: ret i8 -1 + return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_TRUE_UQ); +} + +__mmask8 test_mm256_cmp_ps_mask_true_us(__m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_cmp_ps_mask_true_us + // CHECK-NOT: call + // CHECK: ret i8 -1 + return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_TRUE_US); +} + +__mmask8 test_mm256_cmp_ps_mask_false_oq(__m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_cmp_ps_mask_false_oq + // CHECK-NOT: call + // CHECK: ret i8 0 + return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_FALSE_OQ); +} + +__mmask8 test_mm256_cmp_ps_mask_false_os(__m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_cmp_ps_mask_false_os + // CHECK-NOT: call + // CHECK: ret i8 0 + return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_FALSE_OS); +} + __mmask8 test_mm256_mask_cmp_ps_mask(__mmask8 m, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_mask_cmp_ps_mask - // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256 - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // CHECK: fcmp oeq <8 x float> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return _mm256_mask_cmp_ps_mask(m, __A, __B, 0); } __mmask8 test_mm_cmp_ps_mask(__m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_cmp_ps_mask - // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128 + // CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}} return (__mmask8)_mm_cmp_ps_mask(__A, __B, 0); } +__mmask8 test_mm_cmp_ps_mask_true_uq(__m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_cmp_ps_mask_true_uq + // CHECK-NOT: call + // CHECK: ret i8 -1 + return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_TRUE_UQ); +} + +__mmask8 test_mm_cmp_ps_mask_true_us(__m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_cmp_ps_mask_true_us + // CHECK-NOT: call + // CHECK: ret i8 -1 + return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_TRUE_US); +} + +__mmask8 test_mm_cmp_ps_mask_false_oq(__m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_cmp_ps_mask_false_oq + // CHECK-NOT: call + // CHECK: ret i8 0 + return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_FALSE_OQ); +} + +__mmask8 test_mm_cmp_ps_mask_false_os(__m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_cmp_ps_mask_false_os + // CHECK-NOT: call + // CHECK: ret i8 0 + return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_FALSE_OS); +} + __mmask8 test_mm_mask_cmp_ps_mask(__mmask8 m, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_cmp_ps_mask - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128 - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}} + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> + // CHECK: and <4 x i1> %{{.*}}, %{{.*}} return _mm_mask_cmp_ps_mask(m, __A, __B, 0); } __mmask8 test_mm256_cmp_pd_mask(__m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_cmp_pd_mask - // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256 + // CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}} return (__mmask8)_mm256_cmp_pd_mask(__A, __B, 0); } +__mmask8 test_mm256_cmp_pd_mask_true_uq(__m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_cmp_pd_mask_true_uq + // CHECK-NOT: call + // CHECK: ret i8 -1 + return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_TRUE_UQ); +} + +__mmask8 test_mm256_cmp_pd_mask_true_us(__m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_cmp_pd_mask_true_us + // CHECK-NOT: call + // CHECK: ret i8 -1 + return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_TRUE_US); +} + +__mmask8 test_mm256_cmp_pd_mask_false_oq(__m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_cmp_pd_mask_false_oq + // CHECK-NOT: call + // CHECK: ret i8 0 + return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_FALSE_OQ); +} + +__mmask8 test_mm256_cmp_pd_mask_false_os(__m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_cmp_pd_mask_false_os + // CHECK-NOT: call + // CHECK: ret i8 0 + return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_FALSE_OS); +} + __mmask8 test_mm256_mask_cmp_pd_mask(__mmask8 m, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_mask_cmp_pd_mask - // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256 - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}} + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> + // CHECK: and <4 x i1> %{{.*}}, %{{.*}} return _mm256_mask_cmp_pd_mask(m, __A, __B, 0); } __mmask8 test_mm_cmp_pd_mask(__m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_cmp_pd_mask - // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128 + // CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}} return (__mmask8)_mm_cmp_pd_mask(__A, __B, 0); } +__mmask8 test_mm_cmp_pd_mask_true_uq(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_cmp_pd_mask_true_uq + // CHECK-NOT: call + // CHECK: ret i8 -1 + return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_TRUE_UQ); +} + +__mmask8 test_mm_cmp_pd_mask_true_us(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_cmp_pd_mask_true_us + // CHECK-NOT: call + // CHECK: ret i8 -1 + return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_TRUE_US); +} + +__mmask8 test_mm_cmp_pd_mask_false_oq(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_cmp_pd_mask_false_oq + // CHECK-NOT: call + // CHECK: ret i8 0 + return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_FALSE_OQ); +} + +__mmask8 test_mm_cmp_pd_mask_false_os(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_cmp_pd_mask_false_os + // CHECK-NOT: call + // CHECK: ret i8 0 + return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_FALSE_OS); +} + __mmask8 test_mm_mask_cmp_pd_mask(__mmask8 m, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_cmp_pd_mask - // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128 - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}} + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> + // CHECK: and <2 x i1> %{{.*}}, %{{.*}} return _mm_mask_cmp_pd_mask(m, __A, __B, 0); }