Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -5942,12 +5942,21 @@ for (unsigned i = 0; i < 16; ++i) { bool IsNonZero = (NonZeros & (1 << i)) != 0; if (IsNonZero) { + // If the build vector contains zeros or our first insertion is not the + // first index then insert into zero vector to break any register + // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL. if (First) { - if (NumZero) - V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl); - else - V = DAG.getUNDEF(MVT::v16i8); First = false; + if (NumZero || 0 != i) + V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl); + else { + assert(0 == i && "Expected insertion into zero-index"); + V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32); + V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V); + V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V); + V = DAG.getBitcast(MVT::v16i8, V); + continue; + } } V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V, Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); @@ -5969,6 +5978,8 @@ } if ((i & 1) != 0) { + // FIXME: Investigate extending to i32 instead of just i16. + // FIXME: Investigate combining the first 4 bytes as a i32 instead. SDValue ThisElt, LastElt; bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0; if (LastIsNonZero) { @@ -5984,9 +5995,18 @@ } else ThisElt = LastElt; - if (ThisElt) - V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt, - DAG.getIntPtrConstant(i / 2, dl)); + if (ThisElt) { + if (1 == i) { + V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32) + : DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32); + V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V); + V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V); + V = DAG.getBitcast(MVT::v8i16, V); + } else { + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt, + DAG.getIntPtrConstant(i / 2, dl)); + } + } } } @@ -6007,12 +6027,21 @@ for (unsigned i = 0; i < 8; ++i) { bool IsNonZero = (NonZeros & (1 << i)) != 0; if (IsNonZero) { + // If the build vector contains zeros or our first insertion is not the + // first index then insert into zero vector to break any register + // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL. if (First) { - if (NumZero) - V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); - else - V = DAG.getUNDEF(MVT::v8i16); First = false; + if (NumZero || 0 != i) + V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); + else { + assert(0 == i && "Expected insertion into zero-index"); + V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32); + V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V); + V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V); + V = DAG.getBitcast(MVT::v8i16, V); + continue; + } } V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -1062,7 +1062,7 @@ ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0 ; CHECK-NEXT: kmovw %k4, %eax ; CHECK-NEXT: kmovw %k3, %ecx -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; CHECK-NEXT: vmovd %ecx, %xmm0 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; CHECK-NEXT: kmovw %k5, %eax ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 @@ -1110,7 +1110,7 @@ ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k3 {%k3} ; CHECK-NEXT: kmovw %k5, %eax ; CHECK-NEXT: kmovw %k4, %ecx -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; CHECK-NEXT: vmovd %ecx, %xmm0 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; CHECK-NEXT: kmovw %k6, %eax ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 @@ -1159,7 +1159,7 @@ ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0 ; CHECK-NEXT: kmovw %k4, %eax ; CHECK-NEXT: kmovw %k3, %ecx -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; CHECK-NEXT: vmovd %ecx, %xmm0 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; CHECK-NEXT: kmovw %k5, %eax ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 @@ -1207,7 +1207,7 @@ ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k3 {%k3} ; CHECK-NEXT: kmovw %k5, %eax ; CHECK-NEXT: kmovw %k4, %ecx -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; CHECK-NEXT: vmovd %ecx, %xmm0 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; CHECK-NEXT: kmovw %k6, %eax ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 Index: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -696,7 +696,7 @@ ; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x07] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -744,7 +744,7 @@ ; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xd9,0x07] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -793,7 +793,7 @@ ; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x07] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -841,7 +841,7 @@ ; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xd9,0x07] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] Index: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -16,7 +16,7 @@ ; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x07] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -64,7 +64,7 @@ ; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xd9,0x07] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -113,7 +113,7 @@ ; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x07] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -161,7 +161,7 @@ ; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xd9,0x07] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -210,7 +210,7 @@ ; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x07] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -258,7 +258,7 @@ ; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xf9,0x07] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -307,7 +307,7 @@ ; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x07] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -355,7 +355,7 @@ ; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xf9,0x07] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -406,7 +406,7 @@ ; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x07] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -454,7 +454,7 @@ ; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xf9,0x07] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -503,7 +503,7 @@ ; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x07] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -551,7 +551,7 @@ ; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xf9,0x07] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -600,7 +600,7 @@ ; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x07] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -648,7 +648,7 @@ ; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xf9,0x07] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -697,7 +697,7 @@ ; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x07] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -745,7 +745,7 @@ ; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xf9,0x07] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] Index: llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll +++ llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll @@ -270,6 +270,7 @@ define <8 x i16> @test_buildvector_v8i16_partial(i16 %a1, i16 %a3, i16 %a4, i16 %a5) { ; CHECK-LABEL: test_buildvector_v8i16_partial: ; CHECK: # BB#0: +; CHECK-NEXT: pxor %xmm0, %xmm0 ; CHECK-NEXT: pinsrw $1, %edi, %xmm0 ; CHECK-NEXT: pinsrw $3, %esi, %xmm0 ; CHECK-NEXT: pinsrw $4, %edx, %xmm0 @@ -419,6 +420,7 @@ ; ; SSE41-LABEL: test_buildvector_v16i8_partial: ; SSE41: # BB#0: +; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: pinsrb $2, %edi, %xmm0 ; SSE41-NEXT: pinsrb $6, %esi, %xmm0 ; SSE41-NEXT: pinsrb $8, %edx, %xmm0 @@ -448,10 +450,9 @@ define <16 x i8> @test_buildvector_v16i8_register_zero(i8 %a0, i8 %a4, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) { ; SSE2-LABEL: test_buildvector_v16i8_register_zero: ; SSE2: # BB#0: -; SSE2-NEXT: movzbl %dil, %eax -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: pinsrw $0, %eax, %xmm0 ; SSE2-NEXT: movzbl %sil, %eax +; SSE2-NEXT: movzbl %dil, %esi +; SSE2-NEXT: movd %esi, %xmm0 ; SSE2-NEXT: pinsrw $2, %eax, %xmm0 ; SSE2-NEXT: movzbl %dl, %eax ; SSE2-NEXT: pinsrw $3, %eax, %xmm0 Index: llvm/trunk/test/CodeGen/X86/promote-vec3.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/promote-vec3.ll +++ llvm/trunk/test/CodeGen/X86/promote-vec3.ll @@ -9,17 +9,16 @@ ; SSE3-LABEL: zext_i8: ; SSE3: # BB#0: ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: pxor %xmm1, %xmm1 -; SSE3-NEXT: pinsrw $0, %eax, %xmm1 +; SSE3-NEXT: movd %eax, %xmm0 ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; SSE3-NEXT: pinsrw $1, %eax, %xmm1 +; SSE3-NEXT: pinsrw $1, %eax, %xmm0 ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; SSE3-NEXT: pinsrw $2, %eax, %xmm1 -; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE3-NEXT: movd %xmm1, %eax -; SSE3-NEXT: pextrw $2, %xmm1, %edx -; SSE3-NEXT: pextrw $4, %xmm1, %ecx +; SSE3-NEXT: pinsrw $2, %eax, %xmm0 +; SSE3-NEXT: pxor %xmm1, %xmm1 +; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE3-NEXT: movd %xmm0, %eax +; SSE3-NEXT: pextrw $2, %xmm0, %edx +; SSE3-NEXT: pextrw $4, %xmm0, %ecx ; SSE3-NEXT: # kill: %AX %AX %EAX ; SSE3-NEXT: # kill: %DX %DX %EDX ; SSE3-NEXT: # kill: %CX %CX %ECX @@ -74,7 +73,7 @@ ; SSE3-LABEL: sext_i8: ; SSE3: # BB#0: ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; SSE3-NEXT: pinsrw $0, %eax, %xmm0 +; SSE3-NEXT: movd %eax, %xmm0 ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; SSE3-NEXT: pinsrw $1, %eax, %xmm0 ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax @@ -93,7 +92,7 @@ ; ; SSE41-LABEL: sext_i8: ; SSE41: # BB#0: -; SSE41-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 +; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE41-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 ; SSE41-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 ; SSE41-NEXT: pslld $24, %xmm0 @@ -108,7 +107,7 @@ ; ; AVX-32-LABEL: sext_i8: ; AVX-32: # BB#0: -; AVX-32-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-32-NEXT: vpslld $24, %xmm0, %xmm0 Index: llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-512.ll +++ llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-512.ll @@ -482,7 +482,7 @@ ; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx ; AVX512BW-NEXT: vpextrb $8, %xmm0, %edx ; AVX512BW-NEXT: vpextrb $0, %xmm0, %edi -; AVX512BW-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovd %edi, %xmm0 ; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 ; AVX512BW-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 @@ -496,9 +496,9 @@ ; AVX512BWVL-LABEL: shuffle_v64i8_to_v8i8: ; AVX512BWVL: # BB#0: ; AVX512BWVL-NEXT: vmovdqu8 (%rdi), %zmm0 -; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax -; AVX512BWVL-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 ; AVX512BWVL-NEXT: vpextrb $8, %xmm0, %eax +; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %ecx +; AVX512BWVL-NEXT: vmovd %ecx, %xmm1 ; AVX512BWVL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 ; AVX512BWVL-NEXT: vextracti32x4 $1, %zmm0, %xmm2 ; AVX512BWVL-NEXT: vpextrb $0, %xmm2, %eax Index: llvm/trunk/test/CodeGen/X86/vec_cast2.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_cast2.ll +++ llvm/trunk/test/CodeGen/X86/vec_cast2.ll @@ -97,10 +97,10 @@ ; ; CHECK-WIDE-LABEL: foo3_8: ; CHECK-WIDE: ## BB#0: -; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax -; CHECK-WIDE-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 -; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax +; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax +; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx +; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1 ; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] ; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax @@ -134,10 +134,10 @@ ; ; CHECK-WIDE-LABEL: foo3_4: ; CHECK-WIDE: ## BB#0: -; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax -; CHECK-WIDE-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 -; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax +; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax +; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx +; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1 ; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] ; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -1718,17 +1718,17 @@ ; SSE2-NEXT: movzbl (%rsi), %ecx ; SSE2-NEXT: shll $8, %ecx ; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: movzwl %cx, %eax +; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pinsrw $0, %ecx, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm0[0,1,2,3,5,4,4,4] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,4,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,7] -; SSE2-NEXT: packuswb %xmm2, %xmm0 +; SSE2-NEXT: packuswb %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: PR31364: @@ -1737,8 +1737,8 @@ ; SSSE3-NEXT: movzbl (%rsi), %ecx ; SSSE3-NEXT: shll $8, %ecx ; SSSE3-NEXT: orl %eax, %ecx -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: pinsrw $0, %ecx, %xmm0 +; SSSE3-NEXT: movzwl %cx, %eax +; SSSE3-NEXT: movd %eax, %xmm0 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0] ; SSSE3-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll +++ llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll @@ -131,10 +131,10 @@ define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind { ; X32-SSE-LABEL: and_v3i8_as_i24: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1 ; X32-SSE-NEXT: pand %xmm0, %xmm1 @@ -172,10 +172,10 @@ define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind { ; X32-SSE-LABEL: xor_v3i8_as_i24: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1 ; X32-SSE-NEXT: pxor %xmm0, %xmm1 @@ -213,10 +213,10 @@ define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind { ; X32-SSE-LABEL: or_v3i8_as_i24: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1 ; X32-SSE-NEXT: por %xmm0, %xmm1 Index: llvm/trunk/test/CodeGen/X86/widen_conv-3.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/widen_conv-3.ll +++ llvm/trunk/test/CodeGen/X86/widen_conv-3.ll @@ -65,7 +65,7 @@ ; X86-SSE2-NEXT: shll $8, %edx ; X86-SSE2-NEXT: movzbl (%esp), %esi ; X86-SSE2-NEXT: orl %edx, %esi -; X86-SSE2-NEXT: pinsrw $0, %esi, %xmm0 +; X86-SSE2-NEXT: movd %esi, %xmm0 ; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx ; X86-SSE2-NEXT: pinsrw $1, %ecx, %xmm0 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -115,7 +115,7 @@ ; X64-SSE2-NEXT: shll $8, %eax ; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; X64-SSE2-NEXT: orl %eax, %ecx -; X64-SSE2-NEXT: pinsrw $0, %ecx, %xmm0 +; X64-SSE2-NEXT: movd %ecx, %xmm0 ; X64-SSE2-NEXT: movzbl 2(%rsi), %eax ; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0 ; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] Index: llvm/trunk/test/CodeGen/X86/widen_conv-4.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/widen_conv-4.ll +++ llvm/trunk/test/CodeGen/X86/widen_conv-4.ll @@ -91,7 +91,7 @@ ; X86-SSE2-NEXT: shll $8, %edx ; X86-SSE2-NEXT: movzbl (%esp), %esi ; X86-SSE2-NEXT: orl %edx, %esi -; X86-SSE2-NEXT: pinsrw $0, %esi, %xmm0 +; X86-SSE2-NEXT: movd %esi, %xmm0 ; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx ; X86-SSE2-NEXT: pinsrw $1, %ecx, %xmm0 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1 @@ -140,7 +140,7 @@ ; X64-SSE2-NEXT: shll $8, %eax ; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; X64-SSE2-NEXT: orl %eax, %ecx -; X64-SSE2-NEXT: pinsrw $0, %ecx, %xmm0 +; X64-SSE2-NEXT: movd %ecx, %xmm0 ; X64-SSE2-NEXT: movzbl 2(%rsi), %eax ; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0 ; X64-SSE2-NEXT: pxor %xmm1, %xmm1