Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -7832,55 +7832,66 @@ // TODO: Handle undefs // TODO: Utilize pshufb and zero mask blending to support more efficient // construction of vectors with constant-0 elements. -// TODO: Use smaller-element vectors of same width, and "interpolate" the indices, -// when no native operation available. +// TODO: Use smaller-element vectors of same width, and "interpolate" the +// indices, when no native operation available. static SDValue LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - // Look for VPERMV and PSHUFB opportunities. - MVT VT = V.getSimpleValueType(); - switch (VT.SimpleTy) { - default: - return SDValue(); - case MVT::v16i8: - if (!Subtarget.hasSSE3()) - return SDValue(); - break; - case MVT::v8f32: - case MVT::v8i32: - if (!Subtarget.hasAVX2()) - return SDValue(); - break; - case MVT::v4i64: - case MVT::v4f64: - if (!Subtarget.hasVLX()) - return SDValue(); - break; - case MVT::v16f32: - case MVT::v8f64: - case MVT::v16i32: - case MVT::v8i64: - if (!Subtarget.hasAVX512()) - return SDValue(); - break; - case MVT::v32i16: - if (!Subtarget.hasBWI()) - return SDValue(); - break; - case MVT::v8i16: - case MVT::v16i16: - if (!Subtarget.hasVLX() || !Subtarget.hasBWI()) - return SDValue(); - break; - case MVT::v64i8: - if (!Subtarget.hasVBMI()) - return SDValue(); - break; - case MVT::v32i8: - if (!Subtarget.hasVLX() || !Subtarget.hasVBMI()) - return SDValue(); - break; - } + // Look for VPERMV/VPERMILPV/PSHUFB opportunities. + auto LegalPermuteOpcode = [&Subtarget](MVT DstVT, MVT &ShuffleVT) { + unsigned Opcode = 0; + switch (DstVT.SimpleTy) { + default: + break; + case MVT::v16i8: + if (Subtarget.hasSSE3()) + Opcode = X86ISD::PSHUFB; + break; + case MVT::v2f64: + case MVT::v2i64: + if (Subtarget.hasAVX()) { + Opcode = X86ISD::VPERMILPV; + ShuffleVT = MVT::v2f64; + } + break; + case MVT::v8f32: + case MVT::v8i32: + if (Subtarget.hasAVX2()) + Opcode = X86ISD::VPERMV; + break; + case MVT::v4i64: + case MVT::v4f64: + if (Subtarget.hasVLX()) + Opcode = X86ISD::VPERMV; + break; + case MVT::v16f32: + case MVT::v8f64: + case MVT::v16i32: + case MVT::v8i64: + if (Subtarget.hasAVX512()) + Opcode = X86ISD::VPERMV; + break; + case MVT::v32i16: + if (Subtarget.hasBWI()) + Opcode = X86ISD::VPERMV; + break; + case MVT::v8i16: + case MVT::v16i16: + if (Subtarget.hasVLX() && Subtarget.hasBWI()) + Opcode = X86ISD::VPERMV; + break; + case MVT::v64i8: + if (Subtarget.hasVBMI()) + Opcode = X86ISD::VPERMV; + break; + case MVT::v32i8: + if (Subtarget.hasVLX() && Subtarget.hasVBMI()) + Opcode = X86ISD::VPERMV; + break; + } + return Opcode; + }; + SDValue SrcVec, IndicesVec; // Check for a match of the permute source vector and permute index elements. // This is done by checking that the i-th build_vector operand is of the form: @@ -7918,6 +7929,15 @@ return SDValue(); } + MVT VT = V.getSimpleValueType(); + MVT ShuffleVT = VT; + unsigned Opcode = LegalPermuteOpcode(VT, ShuffleVT); + if (!Opcode) + return SDValue(); + assert(VT.getScalarSizeInBits() == ShuffleVT.getScalarSizeInBits() && + VT.getVectorNumElements() == ShuffleVT.getVectorNumElements() && + "Illegal variable permute shuffle type"); + unsigned NumElts = VT.getVectorNumElements(); if (IndicesVec.getValueType().getVectorNumElements() < NumElts) return SDValue(); @@ -7937,9 +7957,12 @@ SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec))); } - if (VT == MVT::v16i8) - return DAG.getNode(X86ISD::PSHUFB, SDLoc(V), VT, SrcVec, IndicesVec); - return DAG.getNode(X86ISD::VPERMV, SDLoc(V), VT, IndicesVec, SrcVec); + SrcVec = DAG.getBitcast(ShuffleVT, SrcVec); + SDValue Res = + Opcode == X86ISD::VPERMV + ? DAG.getNode(Opcode, SDLoc(V), ShuffleVT, IndicesVec, SrcVec) + : DAG.getNode(Opcode, SDLoc(V), ShuffleVT, SrcVec, IndicesVec); + return DAG.getBitcast(VT, Res); } SDValue Index: llvm/trunk/test/CodeGen/X86/var-permute-128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/var-permute-128.ll +++ llvm/trunk/test/CodeGen/X86/var-permute-128.ll @@ -23,14 +23,7 @@ ; ; AVX-LABEL: var_shuffle_v2i64: ; AVX: # %bb.0: -; AVX-NEXT: vmovq %xmm1, %rax -; AVX-NEXT: andl $1, %eax -; AVX-NEXT: vpextrq $1, %xmm1, %rcx -; AVX-NEXT: andl $1, %ecx -; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %index0 = extractelement <2 x i64> %indices, i32 0 %index1 = extractelement <2 x i64> %indices, i32 1 @@ -280,13 +273,7 @@ ; ; AVX-LABEL: var_shuffle_v2f64: ; AVX: # %bb.0: -; AVX-NEXT: vmovq %xmm1, %rax -; AVX-NEXT: andl $1, %eax -; AVX-NEXT: vpextrq $1, %xmm1, %rcx -; AVX-NEXT: andl $1, %ecx -; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %index0 = extractelement <2 x i64> %indices, i32 0 %index1 = extractelement <2 x i64> %indices, i32 1