Index: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
===================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h
@@ -1904,7 +1904,8 @@
   /// This may be true if the target does not directly support the
   /// multiplication operation for the specified type or the sequence of simpler
   /// ops is faster than the multiply.
-  virtual bool decomposeMulByConstant(EVT VT, SDValue C) const {
+  virtual bool decomposeMulByConstant(LLVMContext &Context,
+                                      EVT VT, SDValue C) const {
     return false;
   }
 
Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3556,7 +3556,7 @@
   //           x * 15 --> (x << 4) - x
   //           x * -33 --> -((x << 5) + x)
   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
-  if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
+  if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
     // TODO: We could handle more general decomposition of any constant by
     //       having the target set a limit on number of ops and making a
     //       callback to determine that sequence (similar to sqrt expansion).
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h
@@ -1105,7 +1105,8 @@
 
     bool convertSelectOfConstantsToMath(EVT VT) const override;
 
-    bool decomposeMulByConstant(EVT VT, SDValue C) const override;
+    bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
+                                SDValue C) const override;
 
     bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
                                   bool IsSigned) const override;
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
@@ -4869,15 +4869,25 @@
   return true;
 }
 
-bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
+bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
+                                               SDValue C) const {
   // TODO: We handle scalars using custom code, but generic combining could make
   // that unnecessary.
   APInt MulC;
   if (!ISD::isConstantSplatVector(C.getNode(), MulC))
     return false;
 
+  // Find the type this will be legalized too. Otherwise we might prematurely
+  // convert this to shl+add/sub and then still have to type legalize those ops.
+  // Another choice would be to defer the decision for illegal types until 
+  // after type legalization. But constant splat vectors of i64 can't make it
+  // through type legalization on 32-bit targets so we would need to special
+  // case vXi64.
+  while (getTypeAction(Context, VT) != TypeLegal)
+    VT = getTypeToTransformTo(Context, VT);
+
   // If vector multiply is legal, assume that's faster than shl + add/sub.
-  // TODO: Multiply is a complex op with higher latency and lower througput in
+  // TODO: Multiply is a complex op with higher latency and lower throughput in
   //       most implementations, so this check could be loosened based on type
   //       and/or a CPU attribute.
   if (isOperationLegal(ISD::MUL, VT))
Index: llvm/trunk/test/CodeGen/X86/vector-mul.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/vector-mul.ll
+++ llvm/trunk/test/CodeGen/X86/vector-mul.ll
@@ -435,26 +435,16 @@
 define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind {
 ; X86-LABEL: mul_v8i32_17:
 ; X86:       # %bb.0:
-; X86-NEXT:    movdqa %xmm0, %xmm2
-; X86-NEXT:    pslld $4, %xmm2
-; X86-NEXT:    paddd %xmm0, %xmm2
-; X86-NEXT:    movdqa %xmm1, %xmm3
-; X86-NEXT:    pslld $4, %xmm3
-; X86-NEXT:    paddd %xmm1, %xmm3
-; X86-NEXT:    movdqa %xmm2, %xmm0
-; X86-NEXT:    movdqa %xmm3, %xmm1
+; X86-NEXT:    movdqa {{.*#+}} xmm2 = [17,17,17,17]
+; X86-NEXT:    pmulld %xmm2, %xmm0
+; X86-NEXT:    pmulld %xmm2, %xmm1
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: mul_v8i32_17:
 ; X64:       # %bb.0:
-; X64-NEXT:    movdqa %xmm0, %xmm2
-; X64-NEXT:    pslld $4, %xmm2
-; X64-NEXT:    paddd %xmm0, %xmm2
-; X64-NEXT:    movdqa %xmm1, %xmm3
-; X64-NEXT:    pslld $4, %xmm3
-; X64-NEXT:    paddd %xmm1, %xmm3
-; X64-NEXT:    movdqa %xmm2, %xmm0
-; X64-NEXT:    movdqa %xmm3, %xmm1
+; X64-NEXT:    movdqa {{.*#+}} xmm2 = [17,17,17,17]
+; X64-NEXT:    pmulld %xmm2, %xmm0
+; X64-NEXT:    pmulld %xmm2, %xmm1
 ; X64-NEXT:    retq
 ;
 ; X64-XOP-LABEL: mul_v8i32_17:
@@ -484,26 +474,16 @@
 define <16 x i16> @mul_v16i16_17(<16 x i16> %a0) nounwind {
 ; X86-LABEL: mul_v16i16_17:
 ; X86:       # %bb.0:
-; X86-NEXT:    movdqa %xmm0, %xmm2
-; X86-NEXT:    psllw $4, %xmm2
-; X86-NEXT:    paddw %xmm0, %xmm2
-; X86-NEXT:    movdqa %xmm1, %xmm3
-; X86-NEXT:    psllw $4, %xmm3
-; X86-NEXT:    paddw %xmm1, %xmm3
-; X86-NEXT:    movdqa %xmm2, %xmm0
-; X86-NEXT:    movdqa %xmm3, %xmm1
+; X86-NEXT:    movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17]
+; X86-NEXT:    pmullw %xmm2, %xmm0
+; X86-NEXT:    pmullw %xmm2, %xmm1
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: mul_v16i16_17:
 ; X64:       # %bb.0:
-; X64-NEXT:    movdqa %xmm0, %xmm2
-; X64-NEXT:    psllw $4, %xmm2
-; X64-NEXT:    paddw %xmm0, %xmm2
-; X64-NEXT:    movdqa %xmm1, %xmm3
-; X64-NEXT:    psllw $4, %xmm3
-; X64-NEXT:    paddw %xmm1, %xmm3
-; X64-NEXT:    movdqa %xmm2, %xmm0
-; X64-NEXT:    movdqa %xmm3, %xmm1
+; X64-NEXT:    movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17]
+; X64-NEXT:    pmullw %xmm2, %xmm0
+; X64-NEXT:    pmullw %xmm2, %xmm1
 ; X64-NEXT:    retq
 ;
 ; X64-XOP-LABEL: mul_v16i16_17:
@@ -797,32 +777,16 @@
 define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind {
 ; X86-LABEL: mul_v8i32_neg33:
 ; X86:       # %bb.0:
-; X86-NEXT:    movdqa %xmm0, %xmm3
-; X86-NEXT:    pslld $5, %xmm3
-; X86-NEXT:    paddd %xmm0, %xmm3
-; X86-NEXT:    pxor %xmm2, %xmm2
-; X86-NEXT:    pxor %xmm0, %xmm0
-; X86-NEXT:    psubd %xmm3, %xmm0
-; X86-NEXT:    movdqa %xmm1, %xmm3
-; X86-NEXT:    pslld $5, %xmm3
-; X86-NEXT:    paddd %xmm1, %xmm3
-; X86-NEXT:    psubd %xmm3, %xmm2
-; X86-NEXT:    movdqa %xmm2, %xmm1
+; X86-NEXT:    movdqa {{.*#+}} xmm2 = [4294967263,4294967263,4294967263,4294967263]
+; X86-NEXT:    pmulld %xmm2, %xmm0
+; X86-NEXT:    pmulld %xmm2, %xmm1
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: mul_v8i32_neg33:
 ; X64:       # %bb.0:
-; X64-NEXT:    movdqa %xmm0, %xmm3
-; X64-NEXT:    pslld $5, %xmm3
-; X64-NEXT:    paddd %xmm0, %xmm3
-; X64-NEXT:    pxor %xmm2, %xmm2
-; X64-NEXT:    pxor %xmm0, %xmm0
-; X64-NEXT:    psubd %xmm3, %xmm0
-; X64-NEXT:    movdqa %xmm1, %xmm3
-; X64-NEXT:    pslld $5, %xmm3
-; X64-NEXT:    paddd %xmm1, %xmm3
-; X64-NEXT:    psubd %xmm3, %xmm2
-; X64-NEXT:    movdqa %xmm2, %xmm1
+; X64-NEXT:    movdqa {{.*#+}} xmm2 = [4294967263,4294967263,4294967263,4294967263]
+; X64-NEXT:    pmulld %xmm2, %xmm0
+; X64-NEXT:    pmulld %xmm2, %xmm1
 ; X64-NEXT:    retq
 ;
 ; X64-XOP-LABEL: mul_v8i32_neg33:
@@ -855,32 +819,16 @@
 define <16 x i16> @mul_v16i16_neg9(<16 x i16> %a0) nounwind {
 ; X86-LABEL: mul_v16i16_neg9:
 ; X86:       # %bb.0:
-; X86-NEXT:    movdqa %xmm0, %xmm3
-; X86-NEXT:    psllw $3, %xmm3
-; X86-NEXT:    paddw %xmm0, %xmm3
-; X86-NEXT:    pxor %xmm2, %xmm2
-; X86-NEXT:    pxor %xmm0, %xmm0
-; X86-NEXT:    psubw %xmm3, %xmm0
-; X86-NEXT:    movdqa %xmm1, %xmm3
-; X86-NEXT:    psllw $3, %xmm3
-; X86-NEXT:    paddw %xmm1, %xmm3
-; X86-NEXT:    psubw %xmm3, %xmm2
-; X86-NEXT:    movdqa %xmm2, %xmm1
+; X86-NEXT:    movdqa {{.*#+}} xmm2 = [65527,65527,65527,65527,65527,65527,65527,65527]
+; X86-NEXT:    pmullw %xmm2, %xmm0
+; X86-NEXT:    pmullw %xmm2, %xmm1
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: mul_v16i16_neg9:
 ; X64:       # %bb.0:
-; X64-NEXT:    movdqa %xmm0, %xmm3
-; X64-NEXT:    psllw $3, %xmm3
-; X64-NEXT:    paddw %xmm0, %xmm3
-; X64-NEXT:    pxor %xmm2, %xmm2
-; X64-NEXT:    pxor %xmm0, %xmm0
-; X64-NEXT:    psubw %xmm3, %xmm0
-; X64-NEXT:    movdqa %xmm1, %xmm3
-; X64-NEXT:    psllw $3, %xmm3
-; X64-NEXT:    paddw %xmm1, %xmm3
-; X64-NEXT:    psubw %xmm3, %xmm2
-; X64-NEXT:    movdqa %xmm2, %xmm1
+; X64-NEXT:    movdqa {{.*#+}} xmm2 = [65527,65527,65527,65527,65527,65527,65527,65527]
+; X64-NEXT:    pmullw %xmm2, %xmm0
+; X64-NEXT:    pmullw %xmm2, %xmm1
 ; X64-NEXT:    retq
 ;
 ; X64-XOP-LABEL: mul_v16i16_neg9: