Index: lib/Target/X86/InstPrinter/X86InstComments.cpp =================================================================== --- lib/Target/X86/InstPrinter/X86InstComments.cpp +++ lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -21,6 +21,92 @@ using namespace llvm; +/// \brief Extracts the src/dst types for a given zero extension instruction. +/// \note While the number of elements in DstVT type correct, the +/// number in the SrcVT type is expanded to fill the src xmm register and the +/// upper elements may not be included in the dst xmm/ymm register. +static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) { + switch (MI->getOpcode()) { + default: + llvm_unreachable("Unknown zero extension instruction"); + // i8 zero extension + case X86::PMOVZXBWrm: + case X86::PMOVZXBWrr: + case X86::VPMOVZXBWrm: + case X86::VPMOVZXBWrr: + SrcVT = MVT::v16i8; + DstVT = MVT::v8i16; + break; + case X86::VPMOVZXBWYrm: + case X86::VPMOVZXBWYrr: + SrcVT = MVT::v16i8; + DstVT = MVT::v16i16; + break; + case X86::PMOVZXBDrm: + case X86::PMOVZXBDrr: + case X86::VPMOVZXBDrm: + case X86::VPMOVZXBDrr: + SrcVT = MVT::v16i8; + DstVT = MVT::v4i32; + break; + case X86::VPMOVZXBDYrm: + case X86::VPMOVZXBDYrr: + SrcVT = MVT::v16i8; + DstVT = MVT::v8i32; + break; + case X86::PMOVZXBQrm: + case X86::PMOVZXBQrr: + case X86::VPMOVZXBQrm: + case X86::VPMOVZXBQrr: + SrcVT = MVT::v16i8; + DstVT = MVT::v2i64; + break; + case X86::VPMOVZXBQYrm: + case X86::VPMOVZXBQYrr: + SrcVT = MVT::v16i8; + DstVT = MVT::v4i64; + break; + // i16 zero extension + case X86::PMOVZXWDrm: + case X86::PMOVZXWDrr: + case X86::VPMOVZXWDrm: + case X86::VPMOVZXWDrr: + SrcVT = MVT::v8i16; + DstVT = MVT::v4i32; + break; + case X86::VPMOVZXWDYrm: + case X86::VPMOVZXWDYrr: + SrcVT = MVT::v8i16; + DstVT = MVT::v8i32; + break; + case X86::PMOVZXWQrm: + case X86::PMOVZXWQrr: + case X86::VPMOVZXWQrm: + case X86::VPMOVZXWQrr: + SrcVT = MVT::v8i16; + DstVT = MVT::v2i64; + break; + case X86::VPMOVZXWQYrm: + case X86::VPMOVZXWQYrr: + SrcVT = MVT::v8i16; + DstVT = MVT::v4i64; + break; + // i32 zero extension + case X86::PMOVZXDQrm: + case X86::PMOVZXDQrr: + case X86::VPMOVZXDQrm: + case X86::VPMOVZXDQrr: + SrcVT = MVT::v4i32; + DstVT = MVT::v2i64; + break; + case X86::VPMOVZXDQYrm: + case X86::VPMOVZXDQYrr: + SrcVT = MVT::v4i32; + DstVT = MVT::v4i64; + break; + } +} + //===----------------------------------------------------------------------===// // Top Level Entrypoint //===----------------------------------------------------------------------===// @@ -750,6 +836,92 @@ ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; + + case X86::MOVSDrr: + case X86::VMOVSDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::MOVSDrm: + case X86::VMOVSDrm: + DecodeScalarMoveMask(MVT::v2f64, nullptr == Src2Name, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + case X86::MOVSSrr: + case X86::VMOVSSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::MOVSSrm: + case X86::VMOVSSrm: + DecodeScalarMoveMask(MVT::v4f32, nullptr == Src2Name, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + + case X86::MOVPQI2QIrr: + case X86::MOVZPQILo2PQIrr: + case X86::VMOVPQI2QIrr: + case X86::VMOVZPQILo2PQIrr: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::MOVQI2PQIrm: + case X86::MOVZQI2PQIrm: + case X86::MOVZPQILo2PQIrm: + case X86::VMOVQI2PQIrm: + case X86::VMOVZQI2PQIrm: + case X86::VMOVZPQILo2PQIrm: + DecodeZeroMoveLowMask(MVT::v2i64, ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + case X86::MOVDI2PDIrm: + case X86::VMOVDI2PDIrm: + DecodeZeroMoveLowMask(MVT::v4i32, ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + + case X86::PMOVZXBWrr: + case X86::PMOVZXBDrr: + case X86::PMOVZXBQrr: + case X86::PMOVZXWDrr: + case X86::PMOVZXWQrr: + case X86::PMOVZXDQrr: + case X86::VPMOVZXBWrr: + case X86::VPMOVZXBDrr: + case X86::VPMOVZXBQrr: + case X86::VPMOVZXWDrr: + case X86::VPMOVZXWQrr: + case X86::VPMOVZXDQrr: + case X86::VPMOVZXBWYrr: + case X86::VPMOVZXBDYrr: + case X86::VPMOVZXBQYrr: + case X86::VPMOVZXWDYrr: + case X86::VPMOVZXWQYrr: + case X86::VPMOVZXDQYrr: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::PMOVZXBWrm: + case X86::PMOVZXBDrm: + case X86::PMOVZXBQrm: + case X86::PMOVZXWDrm: + case X86::PMOVZXWQrm: + case X86::PMOVZXDQrm: + case X86::VPMOVZXBWrm: + case X86::VPMOVZXBDrm: + case X86::VPMOVZXBQrm: + case X86::VPMOVZXWDrm: + case X86::VPMOVZXWQrm: + case X86::VPMOVZXDQrm: + case X86::VPMOVZXBWYrm: + case X86::VPMOVZXBDYrm: + case X86::VPMOVZXBQYrm: + case X86::VPMOVZXWDYrm: + case X86::VPMOVZXWQYrm: + case X86::VPMOVZXDQYrm: { + MVT SrcVT, DstVT; + getZeroExtensionTypes(MI, SrcVT, DstVT); + DecodeZeroExtendMask(SrcVT, DstVT, ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); + } break; } // The only comments we decode are shuffles, so give up if we were unable to Index: lib/Target/X86/Utils/X86ShuffleDecode.h =================================================================== --- lib/Target/X86/Utils/X86ShuffleDecode.h +++ lib/Target/X86/Utils/X86ShuffleDecode.h @@ -90,6 +90,16 @@ /// \brief Decode a VPERMILP variable mask from an IR-level vector constant. void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl &ShuffleMask); +/// \brief Decode a zero extension instruction as a shuffle mask. +void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, + SmallVectorImpl &ShuffleMask); + +/// \brief Decode a move lower and zero upper instruction as a shuffle mask. +void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl &ShuffleMask); + +/// \brief Decode a scalar float move instruction as a shuffle mask. +void DecodeScalarMoveMask(MVT VT, bool IsLoad, + SmallVectorImpl &ShuffleMask); } // llvm namespace #endif Index: lib/Target/X86/Utils/X86ShuffleDecode.cpp =================================================================== --- lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -399,4 +399,36 @@ } } +void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl &Mask) { + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + unsigned NumDstElts = DstVT.getVectorNumElements(); + unsigned SrcScalarBits = SrcVT.getScalarSizeInBits(); + unsigned DstScalarBits = DstVT.getScalarSizeInBits(); + unsigned Scale = DstScalarBits / SrcScalarBits; + assert(SrcScalarBits < DstScalarBits && + "Expected zero extension mask to increase scalar size"); + assert(NumSrcElts >= NumDstElts && "Too many zero extension lanes"); + + for (unsigned i = 0; i != NumDstElts; i++) { + Mask.push_back(i); + for (unsigned j = 1; j != Scale; j++) + Mask.push_back(SM_SentinelZero); + } +} + +void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + ShuffleMask.push_back(0); + for (unsigned i = 1; i < NumElts; i++) + ShuffleMask.push_back(SM_SentinelZero); +} + +void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl &Mask) { + // First element comes from the first element of second source. + // Remaining elements: Load zero extends / Move copies from first source. + unsigned NumElts = VT.getVectorNumElements(); + Mask.push_back(NumElts); + for (unsigned i = 1; i < NumElts; i++) + Mask.push_back(IsLoad ? SM_SentinelZero : i); +} } // llvm namespace Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -5517,16 +5517,9 @@ IsUnary = true; break; case X86ISD::MOVSS: - case X86ISD::MOVSD: { - // The index 0 always comes from the first element of the second source, - // this is why MOVSS and MOVSD are used in the first place. The other - // elements come from the other positions of the first source vector - Mask.push_back(NumElems); - for (unsigned i = 1; i != NumElems; ++i) { - Mask.push_back(i); - } + case X86ISD::MOVSD: + DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask); break; - } case X86ISD::VPERM2X128: ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERM2X128Mask(VT, cast(ImmN)->getZExtValue(), Mask); @@ -24759,7 +24752,7 @@ NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); } - + SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(), Mld->getBasePtr(), NewMask, WideSrc0, Mld->getMemoryVT(), Mld->getMemOperand(), @@ -24789,7 +24782,7 @@ "Unexpected size for truncating masked store"); // We are going to use the original vector elt for storing. // Accumulated smaller vector elements must be a multiple of the store size. - assert (((NumElems * FromSz) % ToSz) == 0 && + assert (((NumElems * FromSz) % ToSz) == 0 && "Unexpected ratio for truncating masked store"); unsigned SizeRatio = FromSz / ToSz; Index: test/CodeGen/X86/vector-shuffle-128-v16.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v16.ll +++ test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -350,12 +350,12 @@ ; SSE2-NEXT: movdqa %xmm0, %xmm4 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15] ; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7] -; SSE2-NEXT: movsd %xmm4, %xmm3 +; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1] ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] -; SSE2-NEXT: movsd %xmm0, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; SSE2-NEXT: packuswb %xmm3, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -800,12 +800,12 @@ ; ; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: ; SSE41: # BB#0: -; SSE41-NEXT: pmovzxbq %xmm0, %xmm0 +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxbq %xmm0, %xmm0 +; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero ; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle @@ -827,12 +827,12 @@ ; ; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: ; SSE41: # BB#0: -; SSE41-NEXT: pmovzxbq %xmm0, %xmm0 +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxbq %xmm0, %xmm0 +; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero ; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle @@ -853,12 +853,12 @@ ; ; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: ; SSE41: # BB#0: -; SSE41-NEXT: pmovzxbd %xmm0, %xmm0 +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxbd %xmm0, %xmm0 +; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle @@ -881,12 +881,12 @@ ; ; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: ; SSE41: # BB#0: -; SSE41-NEXT: pmovzxbd %xmm0, %xmm0 +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxbd %xmm0, %xmm0 +; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle @@ -905,12 +905,12 @@ ; ; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: ; SSE41: # BB#0: -; SSE41-NEXT: pmovzxbw %xmm0, %xmm0 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxbw %xmm0, %xmm0 +; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle @@ -931,12 +931,12 @@ ; ; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: ; SSE41: # BB#0: -; SSE41-NEXT: pmovzxbw %xmm0, %xmm0 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxbw %xmm0, %xmm0 +; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle Index: test/CodeGen/X86/vector-shuffle-128-v2.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v2.ll +++ test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -211,19 +211,19 @@ define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) { ; SSE2-LABEL: shuffle_v2f64_03: ; SSE2: # BB#0: -; SSE2-NEXT: movsd %xmm0, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2f64_03: ; SSE3: # BB#0: -; SSE3-NEXT: movsd %xmm0, %xmm1 +; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2f64_03: ; SSSE3: # BB#0: -; SSSE3-NEXT: movsd %xmm0, %xmm1 +; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; SSSE3-NEXT: movaps %xmm1, %xmm0 ; SSSE3-NEXT: retq ; @@ -242,17 +242,17 @@ define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) { ; SSE2-LABEL: shuffle_v2f64_21: ; SSE2: # BB#0: -; SSE2-NEXT: movsd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2f64_21: ; SSE3: # BB#0: -; SSE3-NEXT: movsd %xmm1, %xmm0 +; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2f64_21: ; SSSE3: # BB#0: -; SSSE3-NEXT: movsd %xmm1, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2f64_21: @@ -299,19 +299,19 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_03: ; SSE2: # BB#0: -; SSE2-NEXT: movsd %xmm0, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_03: ; SSE3: # BB#0: -; SSE3-NEXT: movsd %xmm0, %xmm1 +; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_03: ; SSSE3: # BB#0: -; SSSE3-NEXT: movsd %xmm0, %xmm1 +; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; SSSE3-NEXT: movaps %xmm1, %xmm0 ; SSSE3-NEXT: retq ; @@ -335,19 +335,19 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_03_copy: ; SSE2: # BB#0: -; SSE2-NEXT: movsd %xmm1, %xmm2 +; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_03_copy: ; SSE3: # BB#0: -; SSE3-NEXT: movsd %xmm1, %xmm2 +; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSE3-NEXT: movaps %xmm2, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_03_copy: ; SSSE3: # BB#0: -; SSSE3-NEXT: movsd %xmm1, %xmm2 +; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSSE3-NEXT: movaps %xmm2, %xmm0 ; SSSE3-NEXT: retq ; @@ -489,17 +489,17 @@ define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_21: ; SSE2: # BB#0: -; SSE2-NEXT: movsd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_21: ; SSE3: # BB#0: -; SSE3-NEXT: movsd %xmm1, %xmm0 +; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_21: ; SSSE3: # BB#0: -; SSSE3-NEXT: movsd %xmm1, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_21: @@ -522,19 +522,19 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_21_copy: ; SSE2: # BB#0: -; SSE2-NEXT: movsd %xmm2, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_21_copy: ; SSE3: # BB#0: -; SSE3-NEXT: movsd %xmm2, %xmm1 +; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_21_copy: ; SSSE3: # BB#0: -; SSSE3-NEXT: movsd %xmm2, %xmm1 +; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSSE3-NEXT: movaps %xmm1, %xmm0 ; SSSE3-NEXT: retq ; @@ -650,12 +650,12 @@ define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) { ; SSE-LABEL: shuffle_v2i64_0z: ; SSE: # BB#0: -; SSE-NEXT: movq %xmm0, %xmm0 +; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_0z: ; AVX: # BB#0: -; AVX-NEXT: vmovq %xmm0, %xmm0 +; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> ret <2 x i64> %shuffle @@ -693,19 +693,19 @@ ; SSE2-LABEL: shuffle_v2i64_z1: ; SSE2: # BB#0: ; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: movsd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_z1: ; SSE3: # BB#0: ; SSE3-NEXT: xorps %xmm1, %xmm1 -; SSE3-NEXT: movsd %xmm1, %xmm0 +; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_z1: ; SSSE3: # BB#0: ; SSSE3-NEXT: xorps %xmm1, %xmm1 -; SSSE3-NEXT: movsd %xmm1, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_z1: @@ -732,12 +732,12 @@ define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) { ; SSE-LABEL: shuffle_v2f64_0z: ; SSE: # BB#0: -; SSE-NEXT: movq %xmm0, %xmm0 +; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_0z: ; AVX: # BB#0: -; AVX-NEXT: vmovq %xmm0, %xmm0 +; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> ret <2 x double> %shuffle @@ -780,19 +780,19 @@ ; SSE2-LABEL: shuffle_v2f64_z1: ; SSE2: # BB#0: ; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: movsd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2f64_z1: ; SSE3: # BB#0: ; SSE3-NEXT: xorps %xmm1, %xmm1 -; SSE3-NEXT: movsd %xmm1, %xmm0 +; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2f64_z1: ; SSSE3: # BB#0: ; SSSE3-NEXT: xorps %xmm1, %xmm1 -; SSSE3-NEXT: movsd %xmm1, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2f64_z1: @@ -828,12 +828,12 @@ define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) { ; SSE-LABEL: insert_mem_and_zero_v2i64: ; SSE: # BB#0: -; SSE-NEXT: movq (%rdi), %xmm0 +; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSE-NEXT: retq ; ; AVX-LABEL: insert_mem_and_zero_v2i64: ; AVX: # BB#0: -; AVX-NEXT: vmovq (%rdi), %xmm0 +; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: retq %a = load i64* %ptr %v = insertelement <2 x i64> undef, i64 %a, i32 0 @@ -844,12 +844,12 @@ define <2 x double> @insert_reg_and_zero_v2f64(double %a) { ; SSE-LABEL: insert_reg_and_zero_v2f64: ; SSE: # BB#0: -; SSE-NEXT: movq %xmm0, %xmm0 +; SSE-NEXT: movq %xmm0, %xmm0 {{.*#+}} xmm0 = xmm0[0],zero ; SSE-NEXT: retq ; ; AVX-LABEL: insert_reg_and_zero_v2f64: ; AVX: # BB#0: -; AVX-NEXT: vmovq %xmm0, %xmm0 +; AVX-NEXT: vmovq %xmm0, %xmm0 {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: retq %v = insertelement <2 x double> undef, double %a, i32 0 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> @@ -859,12 +859,12 @@ define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) { ; SSE-LABEL: insert_mem_and_zero_v2f64: ; SSE: # BB#0: -; SSE-NEXT: movsd (%rdi), %xmm0 +; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-NEXT: retq ; ; AVX-LABEL: insert_mem_and_zero_v2f64: ; AVX: # BB#0: -; AVX-NEXT: vmovsd (%rdi), %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: retq %a = load double* %ptr %v = insertelement <2 x double> undef, double %a, i32 0 @@ -876,19 +876,19 @@ ; SSE2-LABEL: insert_reg_lo_v2i64: ; SSE2: # BB#0: ; SSE2-NEXT: movd %rdi, %xmm1 -; SSE2-NEXT: movsd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: insert_reg_lo_v2i64: ; SSE3: # BB#0: ; SSE3-NEXT: movd %rdi, %xmm1 -; SSE3-NEXT: movsd %xmm1, %xmm0 +; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: insert_reg_lo_v2i64: ; SSSE3: # BB#0: ; SSSE3-NEXT: movd %rdi, %xmm1 -; SSSE3-NEXT: movsd %xmm1, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: insert_reg_lo_v2i64: @@ -931,19 +931,19 @@ ; ; SSE41-LABEL: insert_mem_lo_v2i64: ; SSE41: # BB#0: -; SSE41-NEXT: movq (%rdi), %xmm1 +; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: insert_mem_lo_v2i64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovq (%rdi), %xmm1 +; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: insert_mem_lo_v2i64: ; AVX2: # BB#0: -; AVX2-NEXT: vmovq (%rdi), %xmm1 +; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX2-NEXT: retq %a = load i64* %ptr @@ -972,13 +972,13 @@ define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) { ; SSE-LABEL: insert_mem_hi_v2i64: ; SSE: # BB#0: -; SSE-NEXT: movq (%rdi), %xmm1 +; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: insert_mem_hi_v2i64: ; AVX: # BB#0: -; AVX-NEXT: vmovq (%rdi), %xmm1 +; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %a = load i64* %ptr @@ -990,13 +990,13 @@ define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) { ; SSE-LABEL: insert_reg_lo_v2f64: ; SSE: # BB#0: -; SSE-NEXT: movsd %xmm0, %xmm1 +; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: insert_reg_lo_v2f64: ; AVX: # BB#0: -; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] ; AVX-NEXT: retq %v = insertelement <2 x double> undef, double %a, i32 0 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> @@ -1085,7 +1085,7 @@ define <2 x double> @insert_dup_mem_v2f64(double* %ptr) { ; SSE2-LABEL: insert_dup_mem_v2f64: ; SSE2: # BB#0: -; SSE2-NEXT: movsd (%rdi), %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq ; Index: test/CodeGen/X86/vector-shuffle-128-v4.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v4.ll +++ test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -441,21 +441,21 @@ ; SSE2-LABEL: shuffle_v4f32_4zzz: ; SSE2: # BB#0: ; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: movss %xmm0, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v4f32_4zzz: ; SSE3: # BB#0: ; SSE3-NEXT: xorps %xmm1, %xmm1 -; SSE3-NEXT: movss %xmm0, %xmm1 +; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v4f32_4zzz: ; SSSE3: # BB#0: ; SSSE3-NEXT: xorps %xmm1, %xmm1 -; SSSE3-NEXT: movss %xmm0, %xmm1 +; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSSE3-NEXT: movaps %xmm1, %xmm0 ; SSSE3-NEXT: retq ; @@ -661,21 +661,21 @@ ; SSE2-LABEL: shuffle_v4i32_4zzz: ; SSE2: # BB#0: ; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: movss %xmm0, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v4i32_4zzz: ; SSE3: # BB#0: ; SSE3-NEXT: xorps %xmm1, %xmm1 -; SSE3-NEXT: movss %xmm0, %xmm1 +; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v4i32_4zzz: ; SSSE3: # BB#0: ; SSSE3-NEXT: xorps %xmm1, %xmm1 -; SSSE3-NEXT: movss %xmm0, %xmm1 +; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSSE3-NEXT: movaps %xmm1, %xmm0 ; SSSE3-NEXT: retq ; @@ -698,21 +698,21 @@ ; SSE2-LABEL: shuffle_v4i32_z4zz: ; SSE2: # BB#0: ; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: movss %xmm0, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v4i32_z4zz: ; SSE3: # BB#0: ; SSE3-NEXT: xorps %xmm1, %xmm1 -; SSE3-NEXT: movss %xmm0, %xmm1 +; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v4i32_z4zz: ; SSSE3: # BB#0: ; SSSE3-NEXT: xorps %xmm1, %xmm1 -; SSSE3-NEXT: movss %xmm0, %xmm1 +; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1] ; SSSE3-NEXT: retq ; @@ -737,21 +737,21 @@ ; SSE2-LABEL: shuffle_v4i32_zz4z: ; SSE2: # BB#0: ; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: movss %xmm0, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v4i32_zz4z: ; SSE3: # BB#0: ; SSE3-NEXT: xorps %xmm1, %xmm1 -; SSE3-NEXT: movss %xmm0, %xmm1 +; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v4i32_zz4z: ; SSSE3: # BB#0: ; SSSE3-NEXT: xorps %xmm1, %xmm1 -; SSSE3-NEXT: movss %xmm0, %xmm1 +; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1] ; SSSE3-NEXT: retq ; @@ -1033,12 +1033,12 @@ ; ; SSE41-LABEL: shuffle_v4i32_0u1u: ; SSE41: # BB#0: -; SSE41-NEXT: pmovzxdq %xmm0, %xmm0 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_0u1u: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxdq %xmm0, %xmm0 +; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle @@ -1065,12 +1065,12 @@ ; ; SSE41-LABEL: shuffle_v4i32_0z1z: ; SSE41: # BB#0: -; SSE41-NEXT: pmovzxdq %xmm0, %xmm0 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_0z1z: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxdq %xmm0, %xmm0 +; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> ret <4 x i32> %shuffle @@ -1094,12 +1094,12 @@ define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) { ; SSE-LABEL: insert_mem_and_zero_v4i32: ; SSE: # BB#0: -; SSE-NEXT: movd (%rdi), %xmm0 +; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-NEXT: retq ; ; AVX-LABEL: insert_mem_and_zero_v4i32: ; AVX: # BB#0: -; AVX-NEXT: vmovd (%rdi), %xmm0 +; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: retq %a = load i32* %ptr %v = insertelement <4 x i32> undef, i32 %a, i32 0 @@ -1111,21 +1111,21 @@ ; SSE2-LABEL: insert_reg_and_zero_v4f32: ; SSE2: # BB#0: ; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: movss %xmm0, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: insert_reg_and_zero_v4f32: ; SSE3: # BB#0: ; SSE3-NEXT: xorps %xmm1, %xmm1 -; SSE3-NEXT: movss %xmm0, %xmm1 +; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: insert_reg_and_zero_v4f32: ; SSSE3: # BB#0: ; SSSE3-NEXT: xorps %xmm1, %xmm1 -; SSSE3-NEXT: movss %xmm0, %xmm1 +; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSSE3-NEXT: movaps %xmm1, %xmm0 ; SSSE3-NEXT: retq ; @@ -1138,7 +1138,7 @@ ; AVX-LABEL: insert_reg_and_zero_v4f32: ; AVX: # BB#0: ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vmovss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX-NEXT: retq %v = insertelement <4 x float> undef, float %a, i32 0 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> @@ -1148,12 +1148,12 @@ define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) { ; SSE-LABEL: insert_mem_and_zero_v4f32: ; SSE: # BB#0: -; SSE-NEXT: movss (%rdi), %xmm0 +; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-NEXT: retq ; ; AVX-LABEL: insert_mem_and_zero_v4f32: ; AVX: # BB#0: -; AVX-NEXT: vmovss (%rdi), %xmm0 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: retq %a = load float* %ptr %v = insertelement <4 x float> undef, float %a, i32 0 @@ -1165,19 +1165,19 @@ ; SSE2-LABEL: insert_reg_lo_v4i32: ; SSE2: # BB#0: ; SSE2-NEXT: movd %rdi, %xmm1 -; SSE2-NEXT: movsd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: insert_reg_lo_v4i32: ; SSE3: # BB#0: ; SSE3-NEXT: movd %rdi, %xmm1 -; SSE3-NEXT: movsd %xmm1, %xmm0 +; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: insert_reg_lo_v4i32: ; SSSE3: # BB#0: ; SSSE3-NEXT: movd %rdi, %xmm1 -; SSSE3-NEXT: movsd %xmm1, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: insert_reg_lo_v4i32: @@ -1221,19 +1221,19 @@ ; ; SSE41-LABEL: insert_mem_lo_v4i32: ; SSE41: # BB#0: -; SSE41-NEXT: movq (%rdi), %xmm1 +; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: insert_mem_lo_v4i32: ; AVX1: # BB#0: -; AVX1-NEXT: vmovq (%rdi), %xmm1 +; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: insert_mem_lo_v4i32: ; AVX2: # BB#0: -; AVX2-NEXT: vmovq (%rdi), %xmm1 +; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX2-NEXT: retq %a = load <2 x i32>* %ptr @@ -1263,13 +1263,13 @@ define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) { ; SSE-LABEL: insert_mem_hi_v4i32: ; SSE: # BB#0: -; SSE-NEXT: movq (%rdi), %xmm1 +; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: insert_mem_hi_v4i32: ; AVX: # BB#0: -; AVX-NEXT: vmovq (%rdi), %xmm1 +; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %a = load <2 x i32>* %ptr @@ -1281,13 +1281,13 @@ define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) { ; SSE-LABEL: insert_reg_lo_v4f32: ; SSE: # BB#0: -; SSE-NEXT: movsd %xmm0, %xmm1 +; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: insert_reg_lo_v4f32: ; AVX: # BB#0: -; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] ; AVX-NEXT: retq %a.cast = bitcast double %a to <2 x float> %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> Index: test/CodeGen/X86/vector-shuffle-128-v8.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v8.ll +++ test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -1829,12 +1829,12 @@ ; ; SSE41-LABEL: shuffle_v8i16_0uuu1uuu: ; SSE41: # BB#0: -; SSE41-NEXT: pmovzxwq %xmm0, %xmm0 +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v8i16_0uuu1uuu: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxwq %xmm0, %xmm0 +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> ret <8 x i16> %shuffle @@ -1857,12 +1857,12 @@ ; ; SSE41-LABEL: shuffle_v8i16_0zzz1zzz: ; SSE41: # BB#0: -; SSE41-NEXT: pmovzxwq %xmm0, %xmm0 +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v8i16_0zzz1zzz: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxwq %xmm0, %xmm0 +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> ret <8 x i16> %shuffle @@ -1881,12 +1881,12 @@ ; ; SSE41-LABEL: shuffle_v8i16_0u1u2u3u: ; SSE41: # BB#0: -; SSE41-NEXT: pmovzxwd %xmm0, %xmm0 +; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v8i16_0u1u2u3u: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxwd %xmm0, %xmm0 +; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> ret <8 x i16> %shuffle @@ -1907,12 +1907,12 @@ ; ; SSE41-LABEL: shuffle_v8i16_0z1z2z3z: ; SSE41: # BB#0: -; SSE41-NEXT: pmovzxwd %xmm0, %xmm0 +; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v8i16_0z1z2z3z: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxwd %xmm0, %xmm0 +; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> ret <8 x i16> %shuffle Index: test/CodeGen/X86/vector-shuffle-256-v4.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-256-v4.ll +++ test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -794,14 +794,14 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) { ; AVX1-LABEL: insert_mem_and_zero_v4i64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovq (%rdi), %xmm0 +; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: insert_mem_and_zero_v4i64: ; AVX2: # BB#0: -; AVX2-NEXT: vmovq (%rdi), %xmm0 +; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] ; AVX2-NEXT: retq @@ -815,7 +815,7 @@ ; ALL-LABEL: insert_reg_and_zero_v4f64: ; ALL: # BB#0: ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vmovsd %xmm0, %xmm1, %xmm0 +; ALL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] ; ALL-NEXT: retq %v = insertelement <4 x double> undef, double %a, i32 0 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> @@ -825,7 +825,7 @@ define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) { ; ALL-LABEL: insert_mem_and_zero_v4f64: ; ALL: # BB#0: -; ALL-NEXT: vmovsd (%rdi), %xmm0 +; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; ALL-NEXT: retq %a = load double* %ptr %v = insertelement <4 x double> undef, double %a, i32 0 Index: test/CodeGen/X86/vector-shuffle-256-v8.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-256-v8.ll +++ test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -1853,7 +1853,7 @@ define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) { ; ALL-LABEL: concat_v2f32_1: ; ALL: # BB#0: # %entry -; ALL-NEXT: vmovq (%rdi), %xmm0 +; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 ; ALL-NEXT: retq entry: @@ -1868,7 +1868,7 @@ define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) { ; ALL-LABEL: concat_v2f32_2: ; ALL: # BB#0: # %entry -; ALL-NEXT: vmovq (%rdi), %xmm0 +; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 ; ALL-NEXT: retq entry: @@ -1881,7 +1881,7 @@ define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) { ; ALL-LABEL: concat_v2f32_3: ; ALL: # BB#0: # %entry -; ALL-NEXT: vmovq (%rdi), %xmm0 +; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 ; ALL-NEXT: retq entry: