diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8002,10 +8002,11 @@ Elt = NextElt; } - // If our first insertion is not the first index then insert into zero - // vector to break any register dependency else use SCALAR_TO_VECTOR. + // If our first insertion is not the first index or zeros are needed, then + // insert into zero vector. Otherwise, use SCALAR_TO_VECTOR (leaves high + // elements undefined). if (!V) { - if (i != 0) + if (i != 0 || NumZero) V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); else { V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Elt); diff --git a/llvm/test/CodeGen/X86/buildvec-insertvec.ll b/llvm/test/CodeGen/X86/buildvec-insertvec.ll --- a/llvm/test/CodeGen/X86/buildvec-insertvec.ll +++ b/llvm/test/CodeGen/X86/buildvec-insertvec.ll @@ -784,12 +784,13 @@ ret <4 x i32> %5 } -; FIXME: If we do not define all bytes that are extracted, this is a miscompile. +; If we do not define all bytes that are extracted, this is a miscompile. define i32 @PR46586(i8* %p, <4 x i32> %v) { ; SSE2-LABEL: PR46586: ; SSE2: # %bb.0: ; SSE2-NEXT: movzbl 3(%rdi), %eax +; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pinsrw $6, %eax, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: movd %xmm1, %eax