Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -15041,6 +15041,42 @@ } } +// Determine if this shuffle can be implemented with a KSHIFT instruction. +// Returns the shift amount if possible or -1 if not. This is a simplified +// version of matchVectorShuffleAsShift. +static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef Mask, + int MaskOffset, const APInt &Zeroable) { + int Size = Mask.size(); + + auto CheckZeros = [&](int Shift, bool Left) { + for (int j = 0; j < Shift; ++j) + if (!Zeroable[j + (Left ? 0 : (Size - Shift))]) + return false; + + return true; + }; + + auto MatchShift = [&](int Shift, bool Left) { + unsigned Pos = Left ? Shift : 0; + unsigned Low = Left ? 0 : Shift; + unsigned Len = Size - Shift; + if (!isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset)) + return false; + + return true; + }; + + for (int Shift = 1; Shift != Size; ++Shift) + for (bool Left : {true, false}) + if (CheckZeros(Shift, Left) && MatchShift(Shift, Left)) { + Opcode = Left ? X86ISD::KSHIFTL : X86ISD::KSHIFTR; + return Shift; + } + + return -1; +} + + // Lower vXi1 vector shuffles. // There is no a dedicated instruction on AVX-512 that shuffles the masks. // The only way to shuffle bits is to sign-extend the mask vector to SIMD @@ -15050,6 +15086,9 @@ const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + assert(Subtarget.hasAVX512() && + "Cannot lower 512-bit vectors w/o basic ISA!"); + unsigned NumElts = Mask.size(); // Try to recognize shuffles that are just padding a subvector with zeros. @@ -15076,9 +15115,24 @@ Extract, DAG.getIntPtrConstant(0, DL)); } + // Try to match KSHIFTs. + // TODO: Support narrower than legal shifts by widening and extracting. + if (VT == MVT::v16i1 || VT == MVT::v32i1 || VT == MVT::v64i1 || + (Subtarget.hasDQI() && VT == MVT::v8i1)) { + unsigned Opcode; + int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, 0, Zeroable); + if (ShiftAmt >= 0) + return DAG.getNode(Opcode, DL, VT, V1, + DAG.getConstant(ShiftAmt, DL, MVT::i8)); + + // V1 didn't match, try V2. + ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, NumElts, Zeroable); + if (ShiftAmt >= 0) + return DAG.getNode(Opcode, DL, VT, V2, + DAG.getConstant(ShiftAmt, DL, MVT::i8)); + } + - assert(Subtarget.hasAVX512() && - "Cannot lower 512-bit vectors w/o basic ISA!"); MVT ExtVT; switch (VT.SimpleTy) { default: Index: test/CodeGen/X86/avx512-skx-insert-subvec.ll =================================================================== --- test/CodeGen/X86/avx512-skx-insert-subvec.ll +++ test/CodeGen/X86/avx512-skx-insert-subvec.ll @@ -30,11 +30,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 ; CHECK-NEXT: vpmovq2m %xmm0, %k0 -; CHECK-NEXT: vpmovm2d %k0, %ymm0 -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] -; CHECK-NEXT: vpmovd2m %ymm0, %k0 +; CHECK-NEXT: kshiftlb $4, %k0, %k0 ; CHECK-NEXT: vpmovm2w %k0, %xmm0 -; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <8 x i32> ret <8 x i1> %res