Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -7844,6 +7844,34 @@ return concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl); } +// Return true if the instruction zeroes the unused upper part of the +// destination. +static bool isZeroUpperBitsvXi1(unsigned int Opcode) { + switch (Opcode) { + default: + return false; + case X86ISD::PCMPEQM: + case X86ISD::PCMPGTM: + case X86ISD::CMPM: + case X86ISD::CMPMU: + return true; + } +} + +// Return true if all the operands of the given CONCAT_VECTORS node are zeros +// except for the first one. (CONCAT_VECTORS Op, 0, 0,...,0) +static bool isExpandWithZeros(const SDValue &Op) { + assert(Op.getOpcode() == ISD::CONCAT_VECTORS && + "Expand with zeros only possible in CONCAT_VECTORS nodes!"); + + unsigned NumOfOperands = Op.getNumOperands(); + bool AllZeros = true; + for (unsigned i = 1; i < NumOfOperands; i++) + if (!ISD::isBuildVectorAllZeros(Op.getOperand(i).getNode())) + AllZeros = false; + return AllZeros; +} + static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG & DAG) { @@ -7854,6 +7882,38 @@ assert(isPowerOf2_32(NumOfOperands) && "Unexpected number of operands in CONCAT_VECTORS"); + // If this node promotes - by concatenating zeroes - the type of the result + // of a node with instruction that zeroes all upper (irrelevant) bits of the + // output register, replace the CONCAT_VECTORS node with the v8i1 version of + // the previous instruction. + const SDValue &Op0 = Op.getOperand(0); + if (Op0.getSimpleValueType().getVectorNumElements() < 8 && + ResVT.getVectorNumElements() == 8 && isExpandWithZeros(Op)) { + if (isZeroUpperBitsvXi1(Op0.getOpcode())) { + // Unmasked case + SmallVector Operands(Op0.getNode()->op_begin(), + Op0.getNode()->op_end()); + return DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v8i1, Operands); + } else if (Op0.getOpcode() == ISD::AND && + isZeroUpperBitsvXi1(Op0.getOperand(0).getOpcode())) { + // Masked case + const SDValue &ZerroUp = Op0.getOperand(0); + + SDValue Mask = Op0.getOperand(1); + while (Mask.getSimpleValueType() != MVT::v8i1 && + Mask.getOpcode() != ISD::EntryToken) + Mask = Mask.getOperand(0); + + if (Mask.getOpcode() != ISD::EntryToken) { + SmallVector Operands(ZerroUp.getNode()->op_begin(), + ZerroUp.getNode()->op_end()); + const SDValue NewZerroUp = DAG.getNode( + ZerroUp.getOpcode(), SDLoc(ZerroUp), MVT::v8i1, Operands); + return DAG.getNode(ISD::AND, SDLoc(Op0), MVT::v8i1, NewZerroUp, Mask); + } + } + } + SDValue Undef = DAG.getUNDEF(ResVT); if (NumOfOperands > 2) { // Specialize the cases when all, or all but one, of the operands are undef. Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -1619,31 +1619,32 @@ } multiclass avx512_icmp_packed opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _, bit IsCommutable> { + X86VectorVTInfo _, bit IsCommutable, RegisterClass KRC = _.KRC, + RegisterClass KRCWM = _.KRCWM> { let isCommutable = IsCommutable in def rr : AVX512BI, EVEX_4V; def rm : AVX512BI, EVEX_4V; def rrk : AVX512BI, EVEX_4V, EVEX_K; def rmk : AVX512BI opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _, bit IsCommutable> : - avx512_icmp_packed { + X86VectorVTInfo _, bit IsCommutable, RegisterClass KRC = _.KRC, + RegisterClass KRCWM = _.KRCWM> : + avx512_icmp_packed { def rmb : AVX512BI, EVEX_4V, EVEX_B; def rmbk : AVX512BI, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; +let Predicates = [HasAVX512, HasVLX], isCodeGenOnly = 1 in { +defm VPCMPEQDZ128v8 : avx512_icmp_packed_rmb<0x76, "vpcmpeqd", X86pcmpeqm, + avx512vl_i32_info.info128, 1, VK8, VK8WM>, + EVEX_CD8<32, CD8VF>, EVEX_V128; + +defm VPCMPEQQZ128v8 : avx512_icmp_packed_rmb<0x29, "vpcmpeqq", X86pcmpeqm, + avx512vl_i64_info.info128, 1, VK8, VK8WM>, + T8PD, VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V128; + +defm VPCMPEQQZ256v8 : avx512_icmp_packed_rmb<0x29, "vpcmpeqq", X86pcmpeqm, + avx512vl_i64_info.info256, 1, VK8, VK8WM>, + T8PD, VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V256; +} + defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>; @@ -1735,6 +1751,20 @@ avx512vl_i64_info, HasAVX512>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; +let Predicates = [HasAVX512, HasVLX], isCodeGenOnly = 1 in { +defm VPCMPGTDZ128v8 : avx512_icmp_packed_rmb<0x66, "vpcmpgtd", X86pcmpgtm, + avx512vl_i32_info.info128, 0, VK8, VK8WM>, + EVEX_CD8<32, CD8VF>, EVEX_V128; + +defm VPCMPGTQZ128v8 : avx512_icmp_packed_rmb<0x37, "vpcmpgtq", X86pcmpgtm, + avx512vl_i64_info.info128, 0, VK8, VK8WM>, + T8PD, VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V128; + +defm VPCMPGTQZ256v8 : avx512_icmp_packed_rmb<0x37, "vpcmpgtq", X86pcmpgtm, + avx512vl_i64_info.info256, 0, VK8, VK8WM>, + T8PD, VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V256; +} + let Predicates = [HasAVX512, NoVLX] in { def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), (COPY_TO_REGCLASS (VPCMPGTDZrr @@ -1748,40 +1778,41 @@ } multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, - X86VectorVTInfo _> { + X86VectorVTInfo _, RegisterClass KRC = _.KRC, + RegisterClass KRCWM = _.KRCWM> { let isCommutable = 1 in def rri : AVX512AIi8, EVEX_4V; def rmi : AVX512AIi8, EVEX_4V; def rrik : AVX512AIi8, EVEX_4V, EVEX_K; def rmik : AVX512AIi8, EVEX_4V; let mayLoad = 1 in def rmi_alt : AVX512AIi8, EVEX_4V; def rrik_alt : AVX512AIi8, EVEX_4V, EVEX_K; let mayLoad = 1 in def rmik_alt : AVX512AIi8 opc, string Suffix, SDNode OpNode, - X86VectorVTInfo _> : - avx512_icmp_cc { + X86VectorVTInfo _, RegisterClass KRC = _.KRC, + RegisterClass KRCWM = _.KRCWM> : + avx512_icmp_cc { def rmib : AVX512AIi8, EVEX_4V, EVEX_B; def rmibk : AVX512AIi8, EVEX_4V, EVEX_B; def rmibk_alt : AVX512AIi8, VEX_W, EVEX_CD8<64, CD8VF>; +let Predicates = [HasAVX512, HasVLX], isCodeGenOnly = 1 in { + defm VPCMPDZ128v8 : avx512_icmp_cc_rmb<0x1F, "d", X86cmpm, + avx512vl_i32_info.info128, VK8, VK8WM>, + EVEX_CD8<32, CD8VF>, EVEX_V128; + defm VPCMPUDZ128v8 : avx512_icmp_cc_rmb<0x1E, "ud", X86cmpmu, + avx512vl_i32_info.info128, VK8, VK8WM>, + EVEX_CD8<32, CD8VF>, EVEX_V128; + + defm VPCMPQZ128v8 : avx512_icmp_cc_rmb<0x1F, "q", X86cmpm, + avx512vl_i64_info.info128, VK8, VK8WM>, + VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V128; + defm VPCMPQZ256v8 : avx512_icmp_cc_rmb<0x1F, "q", X86cmpm, + avx512vl_i64_info.info256, VK8, VK8WM>, + VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V256; + + defm VPCMPUQZ128v8 : avx512_icmp_cc_rmb<0x1E, "uq", X86cmpmu, + avx512vl_i64_info.info128, VK8, VK8WM>, + VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V128; + defm VPCMPUQZ256v8 : avx512_icmp_cc_rmb<0x1E, "uq", X86cmpmu, + avx512vl_i64_info.info256, VK8, VK8WM>, + VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V256; +} + multiclass avx512_vcmp_common { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -153,15 +153,14 @@ def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>; def X86IntCmpMask : SDTypeProfile<1, 2, - [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisSameAs<1, 2>, SDTCisInt<1>, - SDTCisSameNumEltsAs<0, 1>]>; + [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisSameAs<1, 2>, SDTCisInt<1>]>; def X86pcmpeqm : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>; def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>; def X86CmpMaskCC : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisVec<1>, SDTCisSameAs<2, 1>, - SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>; + SDTCisVT<3, i8>]>; def X86CmpMaskCCRound : SDTypeProfile<1, 4, [SDTCisVec<0>,SDTCVecEltisVT<0, i1>, SDTCisVec<1>, SDTCisSameAs<2, 1>, Index: test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -1004,8 +1004,6 @@ ; CHECK-LABEL: test_pcmpeq_q_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1] -; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] -; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1018,8 +1016,6 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1] -; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] -; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1058,8 +1054,6 @@ ; CHECK-LABEL: test_pcmpgt_q_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1] -; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] -; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1072,8 +1066,6 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1] -; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] -; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1087,8 +1079,6 @@ ; CHECK-LABEL: test_pcmpeq_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] -; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] -; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1101,8 +1091,6 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1] -; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] -; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1116,10 +1104,6 @@ ; CHECK-LABEL: test_pcmpeq_q_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1] -; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] -; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] -; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] -; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1132,10 +1116,6 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] -; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] -; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] -; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] -; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1149,8 +1129,6 @@ ; CHECK-LABEL: test_pcmpgt_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1] -; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] -; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1163,8 +1141,6 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1] -; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] -; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1178,10 +1154,6 @@ ; CHECK-LABEL: test_pcmpgt_q_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1] -; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] -; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] -; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] -; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1194,10 +1166,6 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1] -; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] -; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] -; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] -; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] Index: test/CodeGen/X86/avx512vl-vec-masked-cmp.ll =================================================================== --- test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -0,0 +1,1580 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s + +; Tests for masked compare intrinsics that clang lowers to the following IR + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmpeq_epu32_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmpeq_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp eq <4 x i32> %0, %1 + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmpeq_epu32_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmpeq_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp eq <4 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %2, %extract.i + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmpeq_epu64_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmpeq_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp eq <2 x i64> %__a, %__b + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmpeq_epu64_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmpeq_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp eq <2 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %2 = and <2 x i1> %0, %extract.i + %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmpge_epi32_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmpge_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp sge <4 x i32> %0, %1 + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmpge_epi32_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmpge_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp sge <4 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %2, %extract.i + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmpge_epi64_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmpge_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp sge <2 x i64> %__a, %__b + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmpge_epi64_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmpge_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp sge <2 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %2 = and <2 x i1> %0, %extract.i + %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmpge_epi32_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmpge_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp sge <8 x i32> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmpge_epi32_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmpge_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp sge <8 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %4 = and <8 x i1> %2, %3 + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmpge_epi64_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmpge_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp sge <4 x i64> %__a, %__b + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmpge_epi64_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmpge_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp sge <4 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = and <4 x i1> %0, %extract.i + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmpge_epu32_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmpge_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp uge <4 x i32> %0, %1 + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmpge_epu32_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmpge_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp uge <4 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %2, %extract.i + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmpge_epu64_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmpge_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp uge <2 x i64> %__a, %__b + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmpge_epu64_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmpge_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp uge <2 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %2 = and <2 x i1> %0, %extract.i + %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmpge_epu32_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmpge_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp uge <8 x i32> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmpge_epu32_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmpge_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp uge <8 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %4 = and <8 x i1> %2, %3 + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmpge_epu64_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmpge_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp uge <4 x i64> %__a, %__b + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmpge_epu64_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmpge_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp uge <4 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = and <4 x i1> %0, %extract.i + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmpgt_epu32_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmpgt_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp ugt <4 x i32> %0, %1 + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmpgt_epu32_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmpgt_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp ugt <4 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %2, %extract.i + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmpgt_epu64_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmpgt_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp ugt <2 x i64> %__a, %__b + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmpgt_epu64_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmpgt_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp ugt <2 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %2 = and <2 x i1> %0, %extract.i + %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmpgt_epu32_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmpgt_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp ugt <8 x i32> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmpgt_epu32_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmpgt_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp ugt <8 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %4 = and <8 x i1> %2, %3 + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmpgt_epu64_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmpgt_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp ugt <4 x i64> %__a, %__b + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmpgt_epu64_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmpgt_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp ugt <4 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = and <4 x i1> %0, %extract.i + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmple_epi32_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmple_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp sle <4 x i32> %0, %1 + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmple_epi32_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmple_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp sle <4 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %2, %extract.i + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmple_epi64_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmple_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp sle <2 x i64> %__a, %__b + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmple_epi64_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmple_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp sle <2 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %2 = and <2 x i1> %0, %extract.i + %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmple_epi32_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmple_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpled %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp sle <8 x i32> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmple_epi32_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmple_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpled %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp sle <8 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %4 = and <8 x i1> %2, %3 + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmple_epi64_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmple_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp sle <4 x i64> %__a, %__b + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmple_epi64_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmple_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp sle <4 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = and <4 x i1> %0, %extract.i + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmple_epu32_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmple_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp ule <4 x i32> %0, %1 + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmple_epu32_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmple_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp ule <4 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %2, %extract.i + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmple_epu64_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmple_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp ule <2 x i64> %__a, %__b + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmple_epu64_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmple_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp ule <2 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %2 = and <2 x i1> %0, %extract.i + %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmple_epu32_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmple_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpleud %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp ule <8 x i32> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmple_epu32_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmple_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpleud %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp ule <8 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %4 = and <8 x i1> %2, %3 + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmple_epu64_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmple_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp ule <4 x i64> %__a, %__b + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmple_epu64_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmple_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp ule <4 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = and <4 x i1> %0, %extract.i + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmplt_epi32_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmplt_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp slt <4 x i32> %0, %1 + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmplt_epi32_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmplt_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp slt <4 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %2, %extract.i + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmplt_epi64_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmplt_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp slt <2 x i64> %__a, %__b + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmplt_epi64_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmplt_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp slt <2 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %2 = and <2 x i1> %0, %extract.i + %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmplt_epi32_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmplt_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp slt <8 x i32> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmplt_epi32_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmplt_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp slt <8 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %4 = and <8 x i1> %2, %3 + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmplt_epi64_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmplt_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp slt <4 x i64> %__a, %__b + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmplt_epi64_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmplt_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp slt <4 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = and <4 x i1> %0, %extract.i + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmplt_epu32_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmplt_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp ult <4 x i32> %0, %1 + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmplt_epu32_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmplt_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp ult <4 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %2, %extract.i + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmplt_epu64_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmplt_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp ult <2 x i64> %__a, %__b + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmplt_epu64_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmplt_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp ult <2 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %2 = and <2 x i1> %0, %extract.i + %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmplt_epu32_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmplt_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp ult <8 x i32> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmplt_epu32_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmplt_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp ult <8 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %4 = and <8 x i1> %2, %3 + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmplt_epu64_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmplt_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp ult <4 x i64> %__a, %__b + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmplt_epu64_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmplt_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp ult <4 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = and <4 x i1> %0, %extract.i + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmpneq_epi32_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmpneq_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp ne <4 x i32> %0, %1 + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmpneq_epi32_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmpneq_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp ne <4 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %2, %extract.i + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmpneq_epi64_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmpneq_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp ne <2 x i64> %__a, %__b + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmpneq_epi64_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmpneq_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp ne <2 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %2 = and <2 x i1> %0, %extract.i + %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmpneq_epi32_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmpneq_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp ne <8 x i32> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmpneq_epi32_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmpneq_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp ne <8 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %4 = and <8 x i1> %2, %3 + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmpneq_epi64_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmpneq_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp ne <4 x i64> %__a, %__b + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmpneq_epi64_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmpneq_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp ne <4 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = and <4 x i1> %0, %extract.i + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmpneq_epu32_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmpneq_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp ne <4 x i32> %0, %1 + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmpneq_epu32_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmpneq_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp ne <4 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %2, %extract.i + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmpneq_epu64_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmpneq_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp ne <2 x i64> %__a, %__b + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmpneq_epu64_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmpneq_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp ne <2 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %2 = and <2 x i1> %0, %extract.i + %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmpneq_epu32_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmpneq_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp ne <8 x i32> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmpneq_epu32_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmpneq_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp ne <8 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %4 = and <8 x i1> %2, %3 + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmpneq_epu64_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmpneq_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp ne <4 x i64> %__a, %__b + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmpneq_epu64_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmpneq_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp ne <4 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = and <4 x i1> %0, %extract.i + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmp_eq_epi32_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmp_eq_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp eq <4 x i32> %0, %1 + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmp_lt_epi32_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmp_lt_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp slt <4 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %2, %extract + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmp_lt_epi64_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmp_lt_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp slt <2 x i64> %__a, %__b + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmp_eq_epi64_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmp_eq_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp eq <2 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %2 = and <2 x i1> %0, %extract + %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmp_eq_epi32_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmp_eq_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp eq <8 x i32> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmp_le_epi32_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmp_le_epi32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpled %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp sle <8 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %4 = and <8 x i1> %2, %3 + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmp_eq_epi64_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmp_eq_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp eq <4 x i64> %__a, %__b + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmp_eq_epi64_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmp_eq_epi64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp eq <4 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = and <4 x i1> %0, %extract + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmp_epu32_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmp_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp eq <4 x i32> %0, %1 + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmp_epu32_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmp_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__a to <4 x i32> + %1 = bitcast <2 x i64> %__b to <4 x i32> + %2 = icmp eq <4 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %2, %extract + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_cmp_epu64_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cmp_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp eq <2 x i64> %__a, %__b + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm_mask_cmp_epu64_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_mask_cmp_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: retq +entry: + %0 = icmp eq <2 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %2 = and <2 x i1> %0, %extract + %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmp_epu32_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmp_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp eq <8 x i32> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmp_epu32_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmp_epu32_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <8 x i32> + %1 = bitcast <4 x i64> %__b to <8 x i32> + %2 = icmp eq <8 x i32> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %4 = and <8 x i1> %2, %3 + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_cmp_epu64_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_cmp_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp eq <4 x i64> %__a, %__b + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @test_mm256_mask_cmp_epu64_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm256_mask_cmp_epu64_mask: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = icmp eq <4 x i64> %__a, %__b + %1 = bitcast i8 %__u to <8 x i1> + %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = and <4 x i1> %0, %extract + %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} +