Index: ../lib/Target/X86/X86ISelLowering.cpp =================================================================== --- ../lib/Target/X86/X86ISelLowering.cpp +++ ../lib/Target/X86/X86ISelLowering.cpp @@ -13956,16 +13956,21 @@ return DAG.getNode(X86ISD::VZEXT, DL, VT, In); assert(InVT.getVectorElementType() == MVT::i1); - MVT ExtVT = NumElts == 8 ? MVT::v8i64 : MVT::v16i32; + + // Extend VT if the target is 256 or 128bit vector and VLX is not supported. + MVT ExtVT = VT; + if (!VT.is512BitVector() && !Subtarget.hasVLX()) + ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts); + SDValue One = DAG.getConstant(APInt(ExtVT.getScalarSizeInBits(), 1), DL, ExtVT); SDValue Zero = DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), DL, ExtVT); - SDValue V = DAG.getNode(ISD::VSELECT, DL, ExtVT, In, One, Zero); - if (VT.is512BitVector()) - return V; - return DAG.getNode(X86ISD::VTRUNC, DL, VT, V); + SDValue SelectedVal = DAG.getNode(ISD::VSELECT, DL, ExtVT, In, One, Zero); + if (VT == ExtVT) + return SelectedVal; + return DAG.getNode(X86ISD::VTRUNC, DL, VT, SelectedVal); } static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget, Index: ../test/CodeGen/X86/avx512-ext.ll =================================================================== --- ../test/CodeGen/X86/avx512-ext.ll +++ ../test/CodeGen/X86/avx512-ext.ll @@ -1879,3 +1879,84 @@ %2 = bitcast <8 x i32> %1 to <4 x i64> ret <4 x i64> %2 } + +define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 { +; KNL-LABEL: zext_64xi1_to_64xi8: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 +; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0 +; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 +; KNL-NEXT: retq +; +; SKX-LABEL: zext_64xi1_to_64xi8: +; SKX: ## BB#0: +; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 +; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} +; SKX-NEXT: retq + %mask = icmp eq <64 x i8> %x, %y + %1 = zext <64 x i1> %mask to <64 x i8> + ret <64 x i8> %1 +} + +define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { +; KNL-LABEL: zext_32xi1_to_32xi16: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 +; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0 +; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 +; KNL-NEXT: retq +; +; SKX-LABEL: zext_32xi1_to_32xi16: +; SKX: ## BB#0: +; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 +; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} +; SKX-NEXT: retq + %mask = icmp eq <32 x i16> %x, %y + %1 = zext <32 x i1> %mask to <32 x i16> + ret <32 x i16> %1 +} + +define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 { +; KNL-LABEL: zext_16xi1_to_16xi16: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: retq +; +; SKX-LABEL: zext_16xi1_to_16xi16: +; SKX: ## BB#0: +; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 +; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z} +; SKX-NEXT: retq + %mask = icmp eq <16 x i16> %x, %y + %1 = zext <16 x i1> %mask to <16 x i16> + ret <16 x i16> %1 +} + + +define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 { +; KNL-LABEL: zext_32xi1_to_32xi8: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 +; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 +; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 +; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 +; KNL-NEXT: vpmovdb %zmm1, %xmm1 +; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: retq +; +; SKX-LABEL: zext_32xi1_to_32xi8: +; SKX: ## BB#0: +; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 +; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} +; SKX-NEXT: retq + %mask = icmp eq <32 x i16> %x, %y + %1 = zext <32 x i1> %mask to <32 x i8> + ret <32 x i8> %1 +}