Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -4900,10 +4900,13 @@ /// single input multiple times, and in those cases it will /// adjust the mask to only have indices within that single input. static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, + SmallVectorImpl &Ops, SmallVectorImpl &Mask, bool &IsUnary) { unsigned NumElems = VT.getVectorNumElements(); SDValue ImmN; + assert(Ops.empty() && "Clear Ops vector before calling getTargetShuffleMask"); + IsUnary = false; bool IsFakeUnary = false; switch(N->getOpcode()) { @@ -5008,6 +5011,8 @@ return false; case X86ISD::VPERMV: { IsUnary = true; + // VPERMV is special in that its operands are "inverted". + Ops.push_back(N->getOperand(1)); SDValue MaskNode = N->getOperand(0); SmallVector RawMask; unsigned MaskLoBits = Log2_64(VT.getVectorNumElements()); @@ -5057,6 +5062,14 @@ if (M >= (int)Mask.size()) M -= Mask.size(); + // If we didn't already add operands for the opcode, default to adding + // operands starting at 0. + if (Ops.empty()) { + Ops.push_back(N->getOperand(0)); + if (!IsUnary || IsFakeUnary) + Ops.push_back(N->getOperand(1)); + } + return true; } @@ -5065,16 +5078,17 @@ /// (not just zeroable) from their inputs. /// Returns true if the target shuffle mask was decoded. static bool setTargetShuffleZeroElements(SDValue N, - SmallVectorImpl &Mask) { + SmallVectorImpl &Mask, + SmallVectorImpl &Ops) { bool IsUnary; if (!isTargetShuffle(N.getOpcode())) return false; - if (!getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), true, Mask, - IsUnary)) + if (!getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), true, Ops, + Mask, IsUnary)) return false; - SDValue V1 = N.getOperand(0); - SDValue V2 = IsUnary ? V1 : N.getOperand(1); + SDValue V1 = Ops[0]; + SDValue V2 = IsUnary ? V1 : Ops[1]; while (V1.getOpcode() == ISD::BITCAST) V1 = V1->getOperand(0); @@ -5145,7 +5159,8 @@ static bool resolveTargetShuffleInputs(SDValue Op, bool &IsUnary, SDValue &Op0, SDValue &Op1, SmallVectorImpl &Mask) { - if (!setTargetShuffleZeroElements(Op, Mask)) + SmallVector Ops; + if (!setTargetShuffleZeroElements(Op, Mask, Ops)) return false; int NumElts = Mask.size(); @@ -5155,8 +5170,8 @@ bool Op1InUse = std::any_of(Mask.begin(), Mask.end(), [NumElts](int Idx) { return NumElts <= Idx; }); - Op0 = Op0InUse ? Op.getOperand(0) : SDValue(); - Op1 = Op1InUse ? Op.getOperand(1) : SDValue(); + Op0 = Op0InUse ? Ops[0] : SDValue(); + Op1 = Op1InUse ? Ops[1] : SDValue(); IsUnary = !(Op0InUse && Op1InUse); if (!IsUnary) @@ -5204,9 +5219,10 @@ MVT ShufSVT = ShufVT.getVectorElementType(); int NumElems = (int)ShufVT.getVectorNumElements(); SmallVector ShuffleMask; + SmallVector ShuffleOps; bool IsUnary; - if (!getTargetShuffleMask(N, ShufVT, true, ShuffleMask, IsUnary)) + if (!getTargetShuffleMask(N, ShufVT, true, ShuffleOps, ShuffleMask, IsUnary)) return SDValue(); int Elt = ShuffleMask[Index]; @@ -5217,7 +5233,7 @@ return DAG.getUNDEF(ShufSVT); assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range"); - SDValue NewV = (Elt < NumElems) ? N->getOperand(0) : N->getOperand(1); + SDValue NewV = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1]; return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1); } @@ -23985,8 +24001,10 @@ static SmallVector getPSHUFShuffleMask(SDValue N) { MVT VT = N.getSimpleValueType(); SmallVector Mask; + SmallVector Ops; bool IsUnary; - bool HaveMask = getTargetShuffleMask(N.getNode(), VT, false, Mask, IsUnary); + bool HaveMask = + getTargetShuffleMask(N.getNode(), VT, false, Ops, Mask, IsUnary); (void)HaveMask; assert(HaveMask); @@ -24301,7 +24319,8 @@ // Determine which elements are known to be zero. SmallVector TargetMask; - if (!setTargetShuffleZeroElements(N, TargetMask)) + SmallVector BlendOps; + if (!setTargetShuffleZeroElements(N, TargetMask, BlendOps)) return SDValue(); // Helper function to take inner insertps node and attempt to @@ -24355,7 +24374,8 @@ // Attempt to merge insertps Op1 with an inner target shuffle node. SmallVector TargetMask1; - if (setTargetShuffleZeroElements(Op1, TargetMask1)) { + SmallVector Ops1; + if (setTargetShuffleZeroElements(Op1, TargetMask1, Ops1)) { int M = TargetMask1[SrcIdx]; if (isUndefOrZero(M)) { // Zero/UNDEF insertion - zero out element and remove dependency. @@ -24366,14 +24386,15 @@ // Update insertps mask srcidx and reference the source input directly. assert(0 <= M && M < 8 && "Shuffle index out of range"); InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6); - Op1 = Op1.getOperand(M < 4 ? 0 : 1); + Op1 = Ops1[M < 4 ? 0 : 1]; return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, DAG.getConstant(InsertPSMask, DL, MVT::i8)); } // Attempt to merge insertps Op0 with an inner target shuffle node. SmallVector TargetMask0; - if (!setTargetShuffleZeroElements(Op0, TargetMask0)) + SmallVector Ops0; + if (!setTargetShuffleZeroElements(Op0, TargetMask0, Ops0)) return SDValue(); bool Updated = false; @@ -24404,10 +24425,10 @@ // referenced input directly. if (UseInput00 && !UseInput01) { Updated = true; - Op0 = Op0.getOperand(0); + Op0 = Ops0[0]; } else if (!UseInput00 && UseInput01) { Updated = true; - Op0 = Op0.getOperand(1); + Op0 = Ops0[1]; } if (Updated) @@ -24709,9 +24730,10 @@ return SDValue(); SmallVector ShuffleMask; + SmallVector ShuffleOps; bool UnaryShuffle; if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), true, - ShuffleMask, UnaryShuffle)) + ShuffleOps, ShuffleMask, UnaryShuffle)) return SDValue(); // Select the input vector, guarding against out of range extract vector. @@ -24726,12 +24748,12 @@ return DAG.getUNDEF(EltVT); assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range"); - SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0) - : InVec.getOperand(1); + SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0] + : ShuffleOps[1]; // If inputs to shuffle are the same for both ops, then allow 2 uses - unsigned AllowedUses = InVec.getNumOperands() > 1 && - InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1; + unsigned AllowedUses = + (ShuffleOps.size() > 1 && ShuffleOps[0] == ShuffleOps[1]) ? 2 : 1; if (LdNode.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. @@ -24765,10 +24787,9 @@ SDLoc dl(N); // Create shuffle node taking into account the case that its a unary shuffle - SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) - : InVec.getOperand(1); + SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) : ShuffleOps[1]; Shuffle = DAG.getVectorShuffle(CurrentVT, dl, - InVec.getOperand(0), Shuffle, + ShuffleOps[0], Shuffle, &ShuffleMask[0]); Shuffle = DAG.getBitcast(OriginalVT, Shuffle); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle, Index: test/CodeGen/X86/avx2-vperm-combining.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/avx2-vperm-combining.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s + +target triple = "x86_64-unknown-unknown" + +define <32 x i8> @shuffle_pshufb_vpermd(<8 x i32> %a) { +; CHECK-LABEL: shuffle_pshufb_vpermd: +; CHECK: # BB#0: +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18] +; CHECK-NEXT: retq + %tmp0 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> ) + %tmp1 = bitcast <8 x i32> %tmp0 to <32 x i8> + %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> + ret <32 x i8> %tmp2 +} + +declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)