Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -8060,6 +8060,29 @@ return CGF.Builder.CreateSelect(Mask, Op0, Op1); } +static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, + unsigned NumElts, Value *MaskIn) { + if (MaskIn) { + const auto *C = dyn_cast(MaskIn); + if (!C || !C->isAllOnesValue()) + Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts)); + } + + if (NumElts < 8) { + uint32_t Indices[8]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i; + for (unsigned i = NumElts; i != 8; ++i) + Indices[i] = i % NumElts + NumElts; + Cmp = CGF.Builder.CreateShuffleVector( + Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); + } + + return CGF.Builder.CreateBitCast(Cmp, + IntegerType::get(CGF.getLLVMContext(), + std::max(NumElts, 8U))); +} + static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef Ops) { assert((Ops.size() == 2 || Ops.size() == 4) && @@ -8087,24 +8110,11 @@ Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); } - if (Ops.size() == 4) { - const auto *C = dyn_cast(Ops[3]); - if (!C || !C->isAllOnesValue()) - Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops[3], NumElts)); - } + Value *MaskIn = nullptr; + if (Ops.size() == 4) + MaskIn = Ops[3]; - if (NumElts < 8) { - uint32_t Indices[8]; - for (unsigned i = 0; i != NumElts; ++i) - Indices[i] = i; - for (unsigned i = NumElts; i != 8; ++i) - Indices[i] = i % NumElts + NumElts; - Cmp = CGF.Builder.CreateShuffleVector( - Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); - } - return CGF.Builder.CreateBitCast(Cmp, - IntegerType::get(CGF.getLLVMContext(), - std::max(NumElts, 8U))); + return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn); } static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) { @@ -8882,6 +8892,43 @@ return Builder.CreateExtractValue(Call, 1); } + case X86::BI__builtin_ia32_cmpps128_mask: + case X86::BI__builtin_ia32_cmpps256_mask: + case X86::BI__builtin_ia32_cmpps512_mask: + case X86::BI__builtin_ia32_cmppd128_mask: + case X86::BI__builtin_ia32_cmppd256_mask: + case X86::BI__builtin_ia32_cmppd512_mask: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + Value *MaskIn = Ops[3]; + Ops.erase(&Ops[3]); + + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_cmpps128_mask: + ID = Intrinsic::x86_avx512_mask_cmp_ps_128; + break; + case X86::BI__builtin_ia32_cmpps256_mask: + ID = Intrinsic::x86_avx512_mask_cmp_ps_256; + break; + case X86::BI__builtin_ia32_cmpps512_mask: + ID = Intrinsic::x86_avx512_mask_cmp_ps_512; + break; + case X86::BI__builtin_ia32_cmppd128_mask: + ID = Intrinsic::x86_avx512_mask_cmp_pd_128; + break; + case X86::BI__builtin_ia32_cmppd256_mask: + ID = Intrinsic::x86_avx512_mask_cmp_pd_256; + break; + case X86::BI__builtin_ia32_cmppd512_mask: + ID = Intrinsic::x86_avx512_mask_cmp_pd_512; + break; + } + + Value *Cmp = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn); + } + // SSE packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: case X86::BI__builtin_ia32_cmpeqpd: Index: test/CodeGen/avx512f-builtins.c =================================================================== --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -1001,49 +1001,53 @@ __mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmp_round_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512 return _mm512_cmp_round_ps_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask16 test_mm512_mask_cmp_round_ps_mask(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmp_round_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmp_round_ps_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask16 test_mm512_cmp_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmp_ps_mask - // CHECKn: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512 return _mm512_cmp_ps_mask(a, b, 0); } __mmask16 test_mm512_mask_cmp_ps_mask(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmp_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512 + // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmp_ps_mask(m, a, b, 0); } __mmask8 test_mm512_cmp_round_pd_mask(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmp_round_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512 return _mm512_cmp_round_pd_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask8 test_mm512_mask_cmp_round_pd_mask(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmp_round_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmp_round_pd_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask8 test_mm512_cmp_pd_mask(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmp_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512 return _mm512_cmp_pd_mask(a, b, 0); } __mmask8 test_mm512_mask_cmp_pd_mask(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmp_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512 + // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmp_pd_mask(m, a, b, 0); } Index: test/CodeGen/avx512vl-builtins.c =================================================================== --- test/CodeGen/avx512vl-builtins.c +++ test/CodeGen/avx512vl-builtins.c @@ -1049,49 +1049,53 @@ __mmask8 test_mm256_cmp_ps_mask(__m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_cmp_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.256 + // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256 return (__mmask8)_mm256_cmp_ps_mask(__A, __B, 0); } __mmask8 test_mm256_mask_cmp_ps_mask(__mmask8 m, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_mask_cmp_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.256 + // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256 + // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm256_mask_cmp_ps_mask(m, __A, __B, 0); } __mmask8 test_mm_cmp_ps_mask(__m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_cmp_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.128 + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128 return (__mmask8)_mm_cmp_ps_mask(__A, __B, 0); } __mmask8 test_mm_mask_cmp_ps_mask(__mmask8 m, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_cmp_ps_mask - // CHECK: @llvm.x86.avx512.mask.cmp.ps.128 + // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128 + // CHECK: and <4 x i1> [[CMP]], {{.*}} return _mm_mask_cmp_ps_mask(m, __A, __B, 0); } __mmask8 test_mm256_cmp_pd_mask(__m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_cmp_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.256 + // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256 return (__mmask8)_mm256_cmp_pd_mask(__A, __B, 0); } __mmask8 test_mm256_mask_cmp_pd_mask(__mmask8 m, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_mask_cmp_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.256 + // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256 + // CHECK: and <4 x i1> [[CMP]], {{.*}} return _mm256_mask_cmp_pd_mask(m, __A, __B, 0); } __mmask8 test_mm_cmp_pd_mask(__m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_cmp_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.128 + // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128 return (__mmask8)_mm_cmp_pd_mask(__A, __B, 0); } __mmask8 test_mm_mask_cmp_pd_mask(__mmask8 m, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_cmp_pd_mask - // CHECK: @llvm.x86.avx512.mask.cmp.pd.128 + // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128 + // CHECK: and <2 x i1> [[CMP]], {{.*}} return _mm_mask_cmp_pd_mask(m, __A, __B, 0); }