Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -1439,80 +1439,6 @@ def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">, Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_d_512 : GCCBuiltin<"__builtin_ia32_ptestmd512">, - Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, - llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_q_512 : GCCBuiltin<"__builtin_ia32_ptestmq512">, - Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, - llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_ptestm_b_128 : GCCBuiltin<"__builtin_ia32_ptestmb128">, - Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, - llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_b_256 : GCCBuiltin<"__builtin_ia32_ptestmb256">, - Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, - llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_b_512 : GCCBuiltin<"__builtin_ia32_ptestmb512">, - Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, - llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_d_128 : GCCBuiltin<"__builtin_ia32_ptestmd128">, - Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, - llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_d_256 : GCCBuiltin<"__builtin_ia32_ptestmd256">, - Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, - llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_q_128 : GCCBuiltin<"__builtin_ia32_ptestmq128">, - Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, - llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_q_256 : GCCBuiltin<"__builtin_ia32_ptestmq256">, - Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, - llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_w_128 : GCCBuiltin<"__builtin_ia32_ptestmw128">, - Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, - llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_w_256 : GCCBuiltin<"__builtin_ia32_ptestmw256">, - Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, - llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_w_512 : GCCBuiltin<"__builtin_ia32_ptestmw512">, - Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, - llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; - - def int_x86_avx512_ptestnm_b_128 : GCCBuiltin<"__builtin_ia32_ptestnmb128">, - Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, - llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_b_256 : GCCBuiltin<"__builtin_ia32_ptestnmb256">, - Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, - llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_b_512 : GCCBuiltin<"__builtin_ia32_ptestnmb512">, - Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, - llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_d_128 : GCCBuiltin<"__builtin_ia32_ptestnmd128">, - Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, - llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_d_256 : GCCBuiltin<"__builtin_ia32_ptestnmd256">, - Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, - llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_d_512 : GCCBuiltin<"__builtin_ia32_ptestnmd512">, - Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, - llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_q_128 : GCCBuiltin<"__builtin_ia32_ptestnmq128">, - Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, - llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_q_256 : GCCBuiltin<"__builtin_ia32_ptestnmq256">, - Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, - llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_q_512 : GCCBuiltin<"__builtin_ia32_ptestnmq512">, - Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, - llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_w_128 : GCCBuiltin<"__builtin_ia32_ptestnmw128">, - Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, - llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_w_256 : GCCBuiltin<"__builtin_ia32_ptestnmw256">, - Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, - llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_w_512 : GCCBuiltin<"__builtin_ia32_ptestnmw512">, - Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, - llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_fpclass_pd_128 : GCCBuiltin<"__builtin_ia32_fpclasspd128_mask">, Index: lib/IR/AutoUpgrade.cpp =================================================================== --- lib/IR/AutoUpgrade.cpp +++ lib/IR/AutoUpgrade.cpp @@ -256,6 +256,8 @@ Name.startswith("avx512.cvtmask2") || // Added in 5.0 (Name.startswith("xop.vpcom") && // Added in 3.2 F->arg_size() == 2) || + Name.startswith("avx512.ptestm") || //Added in 6.0 + Name.startswith("avx512.ptestnm") || //Added in 6.0 Name.startswith("sse2.pavg") || // Added in 6.0 Name.startswith("avx2.pavg") || // Added in 6.0 Name.startswith("avx512.mask.pavg")) // Added in 6.0 @@ -823,6 +825,26 @@ return Res; } +// Applying mask on vector of i1's and make sure result is atleast 8 bits wide. +static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask, + unsigned NumElts) { + const auto *C = dyn_cast(Mask); + if (!C || !C->isAllOnesValue()) + Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); + + if (NumElts < 8) { + uint32_t Indices[8]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i; + for (unsigned i = NumElts; i != 8; ++i) + Indices[i] = NumElts + i % NumElts; + Vec = Builder.CreateShuffleVector(Vec, + Constant::getNullValue(Vec->getType()), + Indices); + } + return Vec; +} + static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, unsigned CC, bool Signed) { Value *Op0 = CI.getArgOperand(0); @@ -848,20 +870,8 @@ } Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1); - const auto *C = dyn_cast(Mask); - if (!C || !C->isAllOnesValue()) - Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts)); + Cmp = ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask, NumElts); - if (NumElts < 8) { - uint32_t Indices[8]; - for (unsigned i = 0; i != NumElts; ++i) - Indices[i] = i; - for (unsigned i = NumElts; i != 8; ++i) - Indices[i] = NumElts + i % NumElts; - Cmp = Builder.CreateShuffleVector(Cmp, - Constant::getNullValue(Cmp->getType()), - Indices); - } return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(), std::max(NumElts, 8U))); } @@ -1027,6 +1037,21 @@ Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT, CI->getArgOperand(0), CI->getArgOperand(1)); Rep = Builder.CreateSExt(Rep, CI->getType(), ""); + } else if (IsX86 && (Name.startswith("avx512.ptestm") || + Name.startswith("avx512.ptestnm"))) { + Value *Op0 = CI->getArgOperand(0); + Value *Op1 = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + Rep = Builder.CreateAnd(Op0, Op1); + llvm::Type *Ty = Op0->getType(); + Value *Zero = llvm::Constant::getNullValue(Ty); + ICmpInst::Predicate Pred = + Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; + Rep = Builder.CreateICmp(Pred , Rep, Zero); + unsigned NumElts = Op0->getType()->getVectorNumElements(); + Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask, NumElts); + Rep = Builder.CreateBitCast(Rep, IntegerType::get(CI->getContext(), + std::max(NumElts, 8U))); } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){ unsigned NumElts = CI->getArgOperand(1)->getType()->getVectorNumElements(); Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -1404,30 +1404,6 @@ X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_ptestm_b_128, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_b_256, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_b_512, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_d_128, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_d_256, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_d_512, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_q_128, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_q_256, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_q_512, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_w_128, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_w_256, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_w_512, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_b_128, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_b_256, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_b_512, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_d_128, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_d_256, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_d_512, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_q_128, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_q_256, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_q_512, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_w_128, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_w_256, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_w_512, CMP_MASK, X86ISD::TESTNM, 0), X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), Index: test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -3654,3 +3654,77 @@ ret <8 x i64> %res2 } +define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) { +; CHECK-LABEL: test_vptestmq: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: addb %cl, %al +; CHECK-NEXT: ## kill: %AL %AL %EAX +; CHECK-NEXT: retq + %res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1) + %res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m) + %res2 = add i8 %res1, %res + ret i8 %res2 +} +declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8) + +define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1, i16 %m) { +; CHECK-LABEL: test_vptestmd: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX +; CHECK-NEXT: retq + %res = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1) + %res1 = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 %m) + %res2 = add i16 %res1, %res + ret i16 %res2 +} +declare i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32>, <16 x i32>, i16) + +declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2) + +define i16@test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_512: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k1 {%k1} +; CHECK-NEXT: kmovw %k1, %ecx +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX +; CHECK-NEXT: retq + %res = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) + %res1 = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16-1) + %res2 = add i16 %res, %res1 + ret i16 %res2 +} + +declare i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64>, <8 x i64>, i8 %x2) + +define i8@test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_512: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k1 {%k1} +; CHECK-NEXT: kmovw %k1, %ecx +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: addb %cl, %al +; CHECK-NEXT: ## kill: %AL %AL %EAX +; CHECK-NEXT: retq + %res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) + %res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1) + %res2 = add i8 %res, %res1 + ret i8 %res2 +} + Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -766,42 +766,6 @@ declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) -define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) { -; CHECK-LABEL: test_vptestmq: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %ecx -; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: addb %cl, %al -; CHECK-NEXT: ## kill: %AL %AL %EAX -; CHECK-NEXT: retq - %res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1) - %res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m) - %res2 = add i8 %res1, %res - ret i8 %res2 -} -declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8) - -define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1, i16 %m) { -; CHECK-LABEL: test_vptestmd: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %ecx -; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: addl %ecx, %eax -; CHECK-NEXT: ## kill: %AX %AX %EAX -; CHECK-NEXT: retq - %res = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1) - %res1 = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 %m) - %res2 = add i16 %res1, %res - ret i16 %res2 -} -declare i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32>, <16 x i32>, i16) - define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) { ; CHECK-LABEL: test_mask_store_ss: ; CHECK: ## BB#0: @@ -4133,47 +4097,6 @@ ret <2 x double> %res4 } -declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2) - -define i16@test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %ecx -; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: addl %ecx, %eax -; CHECK-NEXT: ## kill: %AX %AX %EAX -; CHECK-NEXT: retq - %res = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) - %res1 = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16-1) - %res2 = add i16 %res, %res1 - ret i16 %res2 -} - -declare i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64>, <8 x i64>, i8 %x2) - -define i8@test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %ecx -; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: addb %cl, %al -; CHECK-NEXT: ## kill: %AL %AL %EAX -; CHECK-NEXT: retq - %res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) - %res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1) - %res2 = add i8 %res, %res1 - ret i8 %res2 -} - - - - declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ Index: test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -3795,3 +3795,135 @@ ret <64 x i8> %res2 } +declare i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8>, <64 x i8>, i64) + +define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { +; AVX512BW-LABEL: test_int_x86_avx512_ptestm_b_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: kmovq %rdi, %k1 +; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k1 {%k1} +; AVX512BW-NEXT: kmovq %k1, %rcx +; AVX512BW-NEXT: kmovq %k0, %rax +; AVX512BW-NEXT: addq %rcx, %rax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $20, %esp +; AVX512F-32-NEXT: .cfi_def_cfa_offset 24 +; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k1 {%k1} +; AVX512F-32-NEXT: kmovq %k1, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $20, %esp +; AVX512F-32-NEXT: vzeroupper +; AVX512F-32-NEXT: retl + %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) + %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) + %res2 = add i64 %res, %res1 + ret i64 %res2 +} + +declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32) + +define i32@test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { +; AVX512BW-LABEL: test_int_x86_avx512_ptestm_w_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k1 {%k1} +; AVX512BW-NEXT: kmovd %k1, %ecx +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: addl %ecx, %eax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k1 {%k1} +; AVX512F-32-NEXT: kmovd %k1, %ecx +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %ecx, %eax +; AVX512F-32-NEXT: vzeroupper +; AVX512F-32-NEXT: retl + %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) + %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1) + %res2 = add i32 %res, %res1 + ret i32 %res2 +} + +declare i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8>, <64 x i8>, i64 %x2) + +define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { +; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_b_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: kmovq %rdi, %k1 +; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k1 {%k1} +; AVX512BW-NEXT: kmovq %k1, %rcx +; AVX512BW-NEXT: kmovq %k0, %rax +; AVX512BW-NEXT: addq %rcx, %rax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $20, %esp +; AVX512F-32-NEXT: .cfi_def_cfa_offset 24 +; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k1 {%k1} +; AVX512F-32-NEXT: kmovq %k1, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $20, %esp +; AVX512F-32-NEXT: vzeroupper +; AVX512F-32-NEXT: retl + %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) + %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) + %res2 = add i64 %res, %res1 + ret i64 %res2 +} + +declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2) + +define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { +; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_w_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k1 {%k1} +; AVX512BW-NEXT: kmovd %k1, %ecx +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: addl %ecx, %eax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k1 {%k1} +; AVX512F-32-NEXT: kmovd %k1, %ecx +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %ecx, %eax +; AVX512F-32-NEXT: vzeroupper +; AVX512F-32-NEXT: retl + %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) + %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1) + %res2 = add i32 %res, %res1 + ret i32 %res2 +} + Index: test/CodeGen/X86/avx512bw-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bw-intrinsics.ll +++ test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1683,134 +1683,6 @@ ret <32 x i16> %res4 } -declare i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8>, <64 x i8>, i64) - -define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { -; AVX512BW-LABEL: test_int_x86_avx512_ptestm_b_512: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovq %rdi, %k1 -; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1} -; AVX512BW-NEXT: kmovq %k0, %rcx -; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k0 -; AVX512BW-NEXT: kmovq %k0, %rax -; AVX512BW-NEXT: addq %rcx, %rax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_b_512: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: subl $20, %esp -; AVX512F-32-NEXT: .cfi_def_cfa_offset 24 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 -; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kmovq %k0, (%esp) -; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0 -; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) -; AVX512F-32-NEXT: movl (%esp), %eax -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: addl $20, %esp -; AVX512F-32-NEXT: retl - %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) - %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) - %res2 = add i64 %res, %res1 - ret i64 %res2 -} - -declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32) - -define i32@test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { -; AVX512BW-LABEL: test_int_x86_avx512_ptestm_w_512: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovd %edi, %k1 -; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0 {%k1} -; AVX512BW-NEXT: kmovd %k0, %ecx -; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0 -; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: addl %ecx, %eax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_w_512: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kmovd %k0, %ecx -; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0 -; AVX512F-32-NEXT: kmovd %k0, %eax -; AVX512F-32-NEXT: addl %ecx, %eax -; AVX512F-32-NEXT: retl - %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) - %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1) - %res2 = add i32 %res, %res1 - ret i32 %res2 -} - -declare i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8>, <64 x i8>, i64 %x2) - -define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { -; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_b_512: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovq %rdi, %k1 -; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1} -; AVX512BW-NEXT: kmovq %k0, %rcx -; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0 -; AVX512BW-NEXT: kmovq %k0, %rax -; AVX512BW-NEXT: addq %rcx, %rax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_b_512: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: subl $20, %esp -; AVX512F-32-NEXT: .cfi_def_cfa_offset 24 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 -; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kmovq %k0, (%esp) -; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0 -; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) -; AVX512F-32-NEXT: movl (%esp), %eax -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: addl $20, %esp -; AVX512F-32-NEXT: retl - %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) - %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) - %res2 = add i64 %res, %res1 - ret i64 %res2 -} - -declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2) - -define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { -; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_w_512: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovd %edi, %k1 -; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0 {%k1} -; AVX512BW-NEXT: kmovd %k0, %ecx -; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0 -; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: addl %ecx, %eax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_w_512: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kmovd %k0, %ecx -; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0 -; AVX512F-32-NEXT: kmovd %k0, %eax -; AVX512F-32-NEXT: addl %ecx, %eax -; AVX512F-32-NEXT: retl - %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) - %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1) - %res2 = add i32 %res, %res1 - ret i32 %res2 -} - define <32 x i16> @test_x86_avx512_psll_w_512(<32 x i16> %a0, <8 x i16> %a1) { ; AVX512BW-LABEL: test_x86_avx512_psll_w_512: ; AVX512BW: ## BB#0: Index: test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -3672,3 +3672,157 @@ declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) +declare i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8>, <16 x i8>, i16) + +define i16@test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestm_b_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x26,0xc9] +; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] +; CHECK-NEXT: ## kill: %AX %AX %EAX +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) + %res1 = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1) + %res2 = add i16 %res, %res1 + ret i16 %res2 +} + +declare i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8>, <32 x i8>, i32) + +define i32@test_int_x86_avx512_ptestm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestm_b_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestmb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vptestmb %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x26,0xc9] +; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) + %res1 = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1) + %res2 = add i32 %res, %res1 + ret i32 %res2 +} + +declare i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16>, <8 x i16>, i8) + +define i8@test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestm_w_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x26,0xc9] +; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) + %res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1) + %res2 = add i8 %res, %res1 + ret i8 %res2 +} + +declare i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16>, <16 x i16>, i16) + +define i16@test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestm_w_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x26,0xc9] +; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] +; CHECK-NEXT: ## kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) + %res1 = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1) + %res2 = add i16 %res, %res1 + ret i16 %res2 +} + +declare i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8>, <16 x i8>, i16) + +define i16@test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x26,0xc9] +; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] +; CHECK-NEXT: ## kill: %AX %AX %EAX +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) + %res1 = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1) + %res2 = add i16 %res, %res1 + ret i16 %res2 +} + +declare i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8>, <32 x i8>, i32) + +define i32@test_int_x86_avx512_ptestnm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestnmb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vptestnmb %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x26,0xc9] +; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) + %res1 = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1) + %res2 = add i32 %res, %res1 + ret i32 %res2 +} + +declare i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16>, <8 x i16>, i8 %x2) + +define i8@test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x26,0xc9] +; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) + %res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1) + %res2 = add i8 %res, %res1 + ret i8 %res2 +} + +declare i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16>, <16 x i16>, i16 %x2) + +define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x26,0xc9] +; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] +; CHECK-NEXT: ## kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) + %res1 = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1) + %res2 = add i16 %res, %res1 + ret i16 %res2 +} + Index: test/CodeGen/X86/avx512bwvl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -2515,154 +2515,3 @@ ret <16 x i16> %res4 } -declare i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8>, <16 x i8>, i16) - -define i16@test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestm_b_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] -; CHECK-NEXT: ## kill: %AX %AX %EAX -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) - %res1 = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1) - %res2 = add i16 %res, %res1 - ret i16 %res2 -} - -declare i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8>, <32 x i8>, i32) - -define i32@test_int_x86_avx512_ptestm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestm_b_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vptestmb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vptestmb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) - %res1 = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1) - %res2 = add i32 %res, %res1 - ret i32 %res2 -} - -declare i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16>, <8 x i16>, i8) - -define i8@test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestm_w_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] -; CHECK-NEXT: ## kill: %AL %AL %EAX -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) - %res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1) - %res2 = add i8 %res, %res1 - ret i8 %res2 -} - -declare i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16>, <16 x i16>, i16) - -define i16@test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestm_w_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] -; CHECK-NEXT: ## kill: %AX %AX %EAX -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) - %res1 = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1) - %res2 = add i16 %res, %res1 - ret i16 %res2 -} - -declare i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8>, <16 x i8>, i16) - -define i16@test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] -; CHECK-NEXT: ## kill: %AX %AX %EAX -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) - %res1 = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1) - %res2 = add i16 %res, %res1 - ret i16 %res2 -} - -declare i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8>, <32 x i8>, i32) - -define i32@test_int_x86_avx512_ptestnm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vptestnmb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vptestnmb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) - %res1 = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1) - %res2 = add i32 %res, %res1 - ret i32 %res2 -} - -declare i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16>, <8 x i16>, i8 %x2) - -define i8@test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] -; CHECK-NEXT: ## kill: %AL %AL %EAX -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) - %res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1) - %res2 = add i8 %res, %res1 - ret i8 %res2 -} - -declare i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16>, <16 x i16>, i16 %x2) - -define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] -; CHECK-NEXT: ## kill: %AX %AX %EAX -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) - %res1 = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1) - %res2 = add i16 %res, %res1 - ret i16 %res2 -} - - Index: test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -5916,3 +5916,155 @@ ret <8 x i32> %res2 } +declare i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32>, <4 x i32>,i8) + +define i8@test_int_x86_avx512_ptestm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestm_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vptestmd %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc9] +; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) + %res1 = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1) + %res2 = add i8 %res, %res1 + ret i8 %res2 +} + +declare i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32>, <8 x i32>, i8) + +define i8@test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestm_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x27,0xc9] +; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) + %res1 = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1) + %res2 = add i8 %res, %res1 + ret i8 %res2 +} + +declare i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64>, <2 x i64>, i8) + +define i8@test_int_x86_avx512_ptestm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestm_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vptestmq %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc9] +; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) + %res1 = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1) + %res2 = add i8 %res, %res1 + ret i8 %res2 +} + +declare i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64>, <4 x i64>, i8) + +define i8@test_int_x86_avx512_ptestm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestm_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vptestmq %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc9] +; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) + %res1 = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1) + %res2 = add i8 %res, %res1 + ret i8 %res2 +} + +declare i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32>, <4 x i32>, i8 %x2) + +define i8@test_int_x86_avx512_ptestnm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestnmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vptestnmd %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc9] +; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) + %res1 = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1) + %res2 = add i8 %res, %res1 + ret i8 %res2 +} + +declare i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32>, <8 x i32>, i8 %x2) + +define i8@test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x27,0xc9] +; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) + %res1 = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1) + %res2 = add i8 %res, %res1 + ret i8 %res2 +} + +declare i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64>, <2 x i64>, i8 %x2) + +define i8@test_int_x86_avx512_ptestnm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestnmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vptestnmq %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc9] +; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) + %res1 = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1) + %res2 = add i8 %res, %res1 + ret i8 %res2 +} + +declare i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64>, <4 x i64>, i8 %x2) + +define i8@test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestnmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vptestnmq %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc9] +; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) + %res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1) + %res2 = add i8 %res, %res1 + ret i8 %res2 +} + Index: test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics.ll +++ test/CodeGen/X86/avx512vl-intrinsics.ll @@ -3941,160 +3941,6 @@ ret <8 x float> %res4 } -declare i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32>, <4 x i32>,i8) - -define i8@test_int_x86_avx512_ptestm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestm_d_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestmd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] -; CHECK-NEXT: vptestmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] -; CHECK-NEXT: ## kill: %AL %AL %EAX -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) - %res1 = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1) - %res2 = add i8 %res, %res1 - ret i8 %res2 -} - -declare i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32>, <8 x i32>, i8) - -define i8@test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestm_d_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] -; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] -; CHECK-NEXT: ## kill: %AL %AL %EAX -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) - %res1 = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1) - %res2 = add i8 %res, %res1 - ret i8 %res2 -} - -declare i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64>, <2 x i64>, i8) - -define i8@test_int_x86_avx512_ptestm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestm_q_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestmq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] -; CHECK-NEXT: vptestmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] -; CHECK-NEXT: ## kill: %AL %AL %EAX -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) - %res1 = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1) - %res2 = add i8 %res, %res1 - ret i8 %res2 -} - -declare i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64>, <4 x i64>, i8) - -define i8@test_int_x86_avx512_ptestm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestm_q_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestmq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] -; CHECK-NEXT: vptestmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] -; CHECK-NEXT: ## kill: %AL %AL %EAX -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) - %res1 = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1) - %res2 = add i8 %res, %res1 - ret i8 %res2 -} - -declare i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32>, <4 x i32>, i8 %x2) - -define i8@test_int_x86_avx512_ptestnm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestnmd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] -; CHECK-NEXT: vptestnmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] -; CHECK-NEXT: ## kill: %AL %AL %EAX -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) - %res1 = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1) - %res2 = add i8 %res, %res1 - ret i8 %res2 -} - -declare i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32>, <8 x i32>, i8 %x2) - -define i8@test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] -; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] -; CHECK-NEXT: ## kill: %AL %AL %EAX -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) - %res1 = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1) - %res2 = add i8 %res, %res1 - ret i8 %res2 -} - -declare i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64>, <2 x i64>, i8 %x2) - -define i8@test_int_x86_avx512_ptestnm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestnmq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] -; CHECK-NEXT: vptestnmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] -; CHECK-NEXT: ## kill: %AL %AL %EAX -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) - %res1 = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1) - %res2 = add i8 %res, %res1 - ret i8 %res2 -} - -declare i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64>, <4 x i64>, i8 %x2) - -define i8@test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestnmq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] -; CHECK-NEXT: vptestnmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] -; CHECK-NEXT: ## kill: %AL %AL %EAX -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) - %res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1) - %res2 = add i8 %res, %res1 - ret i8 %res2 -} - - - define <2 x i64> @test_x86_avx512_psra_q_128(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK-LABEL: test_x86_avx512_psra_q_128: ; CHECK: ## BB#0: