Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1411,6 +1411,40 @@ KnownZero.insertBits(SubZero, SubIdx); break; } + case ISD::INSERT_VECTOR_ELT: { + SDValue Vec = Op.getOperand(0); + SDValue Scl = Op.getOperand(1); + auto *CIdx = dyn_cast(Op.getOperand(2)); + + // For a legal, constant insertion index, if we don't need this insertion + // then strip it, else remove it from the demanded elts. + if (CIdx && CIdx->getAPIntValue().ult(NumElts)) { + unsigned Idx = CIdx->getZExtValue(); + if (!DemandedElts[Idx]) + return TLO.CombineTo(Op, Vec); + DemandedElts.clearBit(Idx); + + if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + + KnownUndef.clearBit(Idx); + if (Scl.isUndef()) + KnownUndef.setBit(Idx); + + KnownZero.clearBit(Idx); + if (isNullConstant(Scl) || isNullFPConstant(Scl)) + KnownZero.setBit(Idx); + break; + } + + APInt VecUndef, VecZero; + if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO, + Depth + 1)) + return true; + // Without knowing the insertion index we can't set KnownUndef/KnownZero. + break; + } case ISD::VSELECT: { APInt DemandedLHS(DemandedElts); APInt DemandedRHS(DemandedElts); Index: test/CodeGen/X86/known-bits-vector.ll =================================================================== --- test/CodeGen/X86/known-bits-vector.ll +++ test/CodeGen/X86/known-bits-vector.ll @@ -51,7 +51,7 @@ ; X32: # %bb.0: ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0 +; X32-NEXT: vmovd %eax, %xmm0 ; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] ; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 @@ -61,7 +61,7 @@ ; X64: # %bb.0: ; X64-NEXT: movzwl %di, %eax ; X64-NEXT: movzwl %si, %ecx -; X64-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0 +; X64-NEXT: vmovd %eax, %xmm0 ; X64-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 Index: test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- test/CodeGen/X86/masked_gather_scatter.ll +++ test/CodeGen/X86/masked_gather_scatter.ll @@ -757,7 +757,6 @@ define <16 x float> @test14(float* %base, i32 %ind, <16 x float*> %vec) { ; KNL_64-LABEL: test14: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 ; KNL_64-NEXT: vpbroadcastq %xmm0, %zmm0 ; KNL_64-NEXT: vmovd %esi, %xmm1 ; KNL_64-NEXT: vpbroadcastd %xmm1, %ymm1 @@ -771,7 +770,6 @@ ; ; KNL_32-LABEL: test14: ; KNL_32: # %bb.0: -; KNL_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; KNL_32-NEXT: vpbroadcastd %xmm0, %zmm0 ; KNL_32-NEXT: vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1 ; KNL_32-NEXT: vpaddd %zmm1, %zmm0, %zmm1 @@ -781,7 +779,6 @@ ; ; SKX-LABEL: test14: ; SKX: # %bb.0: -; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 ; SKX-NEXT: vpbroadcastq %xmm0, %zmm0 ; SKX-NEXT: vpbroadcastd %esi, %ymm1 ; SKX-NEXT: vpmovsxdq %ymm1, %zmm1 @@ -794,7 +791,6 @@ ; ; SKX_32-LABEL: test14: ; SKX_32: # %bb.0: -; SKX_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; SKX_32-NEXT: vpbroadcastd %xmm0, %zmm0 ; SKX_32-NEXT: vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1 ; SKX_32-NEXT: vpaddd %zmm1, %zmm0, %zmm1 Index: test/CodeGen/X86/vec_insert-7.ll =================================================================== --- test/CodeGen/X86/vec_insert-7.ll +++ test/CodeGen/X86/vec_insert-7.ll @@ -9,13 +9,9 @@ ; X32-LABEL: mmx_movzl: ; X32: ## %bb.0: ; X32-NEXT: subl $20, %esp -; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp) -; X32-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; X32-NEXT: movl $32, %eax -; X32-NEXT: pinsrd $0, %eax, %xmm0 -; X32-NEXT: pxor %xmm1, %xmm1 -; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; X32-NEXT: movq %xmm1, (%esp) +; X32-NEXT: movd %eax, %xmm0 +; X32-NEXT: movq %xmm0, (%esp) ; X32-NEXT: movq (%esp), %mm0 ; X32-NEXT: addl $20, %esp ; X32-NEXT: retl