Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -304,6 +304,7 @@ SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitSCALAR_TO_VECTOR(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); SDValue visitMLOAD(SDNode *N); SDValue visitMSTORE(SDNode *N); @@ -1371,6 +1372,7 @@ case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); case ISD::MLOAD: return visitMLOAD(N); case ISD::MSTORE: return visitMSTORE(N); @@ -12188,6 +12190,34 @@ return SDValue(); } +SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { + SDValue InVal = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern + // with a VECTOR_SHUFFLE. + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + SDValue InVec = InVal->getOperand(0); + SDValue EltNo = InVal->getOperand(1); + + // FIXME: We could support implicit truncation if the shuffle can be + // scaled to a smaller vector scalar type. + ConstantSDNode *C0 = dyn_cast(EltNo); + if (C0 && VT == InVec.getValueType() && + VT.getScalarType() == InVal.getValueType()) { + SmallVector NewMask(VT.getVectorNumElements(), -1); + int Elt = C0->getZExtValue(); + NewMask[0] = Elt; + + if (TLI.isShuffleMaskLegal(NewMask, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT), + NewMask); + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N2 = N->getOperand(2); Index: llvm/trunk/test/CodeGen/X86/mmx-arg-passing-x86-64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/mmx-arg-passing-x86-64.ll +++ llvm/trunk/test/CodeGen/X86/mmx-arg-passing-x86-64.ll @@ -25,11 +25,9 @@ ; X86-64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) ; X86-64-NEXT: movdq2q %xmm0, %mm0 ; X86-64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X86-64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X86-64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X86-64-NEXT: paddb %xmm0, %xmm1 -; X86-64-NEXT: movd %xmm1, %rax -; X86-64-NEXT: movd %rax, %xmm0 +; X86-64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X86-64-NEXT: paddb %xmm1, %xmm0 ; X86-64-NEXT: movb $1, %al ; X86-64-NEXT: jmp _pass_v8qi ## TAILCALL %v1a = bitcast x86_mmx %v1 to <8 x i8> Index: llvm/trunk/test/CodeGen/X86/pr14161.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr14161.ll +++ llvm/trunk/test/CodeGen/X86/pr14161.ll @@ -26,9 +26,8 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: movdqa (%rdi), %xmm0 ; CHECK-NEXT: pminud {{.*}}(%rip), %xmm0 -; CHECK-NEXT: pextrd $1, %xmm0, %eax -; CHECK-NEXT: movd %eax, %xmm0 -; CHECK-NEXT: pmovzxwq %xmm0, %xmm0 +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; CHECK-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; CHECK-NEXT: retq entry: %2 = load <4 x i32>, <4 x i32>* %0, align 16