Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1164,8 +1164,28 @@
   else
     return nullptr;
 
-  // TODO: There are potential folds where the opcodes do not match (mul+shl).
-  if (B0->getOpcode() != B1->getOpcode())
+  // We need matching binops to fold the lanes together.
+  BinaryOperator::BinaryOps Opcode0 = B0->getOpcode();
+  BinaryOperator::BinaryOps Opcode1 = B1->getOpcode();
+  bool DropNSW = false;
+  if (ConstantsAreOp1 && Opcode0 != Opcode1) {
+    // If we have multiply and shift-left-by-constant, convert the shift:
+    // shl X, C --> mul X, 1 << C
+    // TODO: We drop "nsw" if shift is converted into multiply because it may
+    // not be correct when the shift amount is BitWidth - 1. We could examine
+    // each vector element to determine if it is safe to keep that flag.
+    if (Opcode0 == Instruction::Mul && Opcode1 == Instruction::Shl) {
+      C1 = ConstantExpr::getShl(ConstantInt::get(C1->getType(), 1), C1);
+      Opcode1 = Instruction::Mul;
+      DropNSW = true;
+    } else if (Opcode0 == Instruction::Shl && Opcode1 == Instruction::Mul) {
+      C0 = ConstantExpr::getShl(ConstantInt::get(C0->getType(), 1), C0);
+      Opcode0 = Instruction::Mul;
+      DropNSW = true;
+    }
+  }
+
+  if (Opcode0 != Opcode1)
     return nullptr;
 
   // Remove a binop and the shuffle by rearranging the constant:
@@ -1186,6 +1206,8 @@
   // Flags are intersected from the 2 source binops.
   NewBO->copyIRFlags(B0);
   NewBO->andIRFlags(B1);
+  if (DropNSW)
+    NewBO->setHasNoSignedWrap(false);
   return NewBO;
 }
 
Index: test/Transforms/InstCombine/shuffle_select.ll
===================================================================
--- test/Transforms/InstCombine/shuffle_select.ll
+++ test/Transforms/InstCombine/shuffle_select.ll
@@ -239,14 +239,11 @@
   ret <4 x double> %t3
 }
 
-; FIXME:
 ; Shift-left with constant shift amount can be converted to mul to enable the fold.
 
 define <4 x i32> @mul_shl(<4 x i32> %v0) {
 ; CHECK-LABEL: @mul_shl(
-; CHECK-NEXT:    [[T1:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 undef, i32 undef, i32 3, i32 4>
-; CHECK-NEXT:    [[T2:%.*]] = shl nuw <4 x i32> [[V0]], <i32 5, i32 6, i32 7, i32 8>
-; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 32, i32 64, i32 3, i32 4>
 ; CHECK-NEXT:    ret <4 x i32> [[T3]]
 ;
   %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
@@ -255,11 +252,11 @@
   ret <4 x i32> %t3
 }
 
+; Try with shift as operand 0 of the shuffle; 'nsw' is dropped for safety, but that could be improved.
+
 define <4 x i32> @shl_mul(<4 x i32> %v0) {
 ; CHECK-LABEL: @shl_mul(
-; CHECK-NEXT:    [[T1:%.*]] = shl nsw <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT:    [[T2:%.*]] = mul nsw <4 x i32> [[V0]], <i32 5, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 5, i32 undef, i32 8, i32 16>
 ; CHECK-NEXT:    ret <4 x i32> [[T3]]
 ;
   %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
@@ -273,8 +270,7 @@
 
 define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) {
 ; CHECK-LABEL: @mul_is_nop_shl(
-; CHECK-NEXT:    [[T2:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 7, i32 8>
-; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    ret <4 x i32> [[T3]]
 ;
   %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
@@ -283,6 +279,8 @@
   ret <4 x i32> %t3
 }
 
+; Negative test: shift amount (operand 1) must be constant.
+
 define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) {
 ; CHECK-LABEL: @shl_mul_not_constant_shift_amount(
 ; CHECK-NEXT:    [[T1:%.*]] = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]