Index: llvm/lib/Transforms/Vectorize/VectorCombine.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -34,6 +34,7 @@
 #define DEBUG_TYPE "vector-combine"
 STATISTIC(NumVecCmp, "Number of vector compares formed");
 STATISTIC(NumVecBO, "Number of vector binops formed");
+STATISTIC(NumScalarBO, "Number of scalar binops formed");
 
 static cl::opt<bool> DisableVectorCombine(
     "disable-vector-combine", cl::init(false), cl::Hidden,
@@ -308,6 +309,56 @@
   return true;
 }
 
+/// Match a vector binop instruction with inserted scalar operands and convert
+/// to scalar binop followed by insertelement.
+static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) {
+  Instruction *Ins0, *Ins1;
+  if (!match(&I, m_BinOp(m_Instruction(Ins0), m_Instruction(Ins1))))
+    return false;
+
+  Value *V0, *V1;
+  uint64_t Index;
+  if (!match(Ins0, m_OneUse(m_InsertElement(m_Undef(), m_Value(V0),
+                                            m_ConstantInt(Index)))) ||
+      !match(Ins1, m_OneUse(m_InsertElement(m_Undef(), m_Value(V1),
+                                            m_SpecificInt(Index)))))
+    return false;
+
+  Type *ScalarTy = V0->getType();
+  Type *VecTy = I.getType();
+  assert(VecTy->isVectorTy() && ScalarTy == V1->getType() &&
+         (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy()) &&
+         "Unexpected types for insert into binop");
+  int ScalarOpCost = TTI.getArithmeticInstrCost(I.getOpcode(), ScalarTy);
+  int VectorOpCost = TTI.getArithmeticInstrCost(I.getOpcode(), VecTy);
+
+  // Get cost estimate for the insert element. This cost will factor into
+  // both sequences.
+  int InsertCost =
+      TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
+  int OldCost = InsertCost + InsertCost + VectorOpCost;
+  int NewCost = ScalarOpCost + InsertCost;
+  if (OldCost < NewCost)
+    return false;
+
+  // bo (ins undef, V0, Index), (ins undef, V1, Index) -->
+  // ins undef, (bo V0, V1), Index
+  ++NumScalarBO;
+  IRBuilder<> Builder(&I);
+  Value *ScalarBO =
+      Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
+
+  // All IR flags are safe to back-propagate. There is no potential for extra
+  // poison to be created by the scalar instruction.
+  if (auto *ScalarBOInst = dyn_cast<Instruction>(ScalarBO))
+    ScalarBOInst->copyIRFlags(&I);
+
+  Value *Insert =
+      Builder.CreateInsertElement(UndefValue::get(VecTy), ScalarBO, Index);
+  I.replaceAllUsesWith(Insert);
+  return true;
+}
+
 /// This is the entry point for all transforms. Pass manager differences are
 /// handled in the callers of this function.
 static bool runImpl(Function &F, const TargetTransformInfo &TTI,
@@ -330,6 +381,7 @@
         continue;
       MadeChange |= foldExtractExtract(I, TTI);
       MadeChange |= foldBitcastShuf(I, TTI);
+      MadeChange |= scalarizeBinop(I, TTI);
     }
   }
 
Index: llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
===================================================================
--- llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
+++ llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
@@ -8,10 +8,9 @@
 
 define <16 x i8> @ins0_ins0_add(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ins0_ins0_add(
-; CHECK-NEXT:    [[I0:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[I1:%.*]] = insertelement <16 x i8> undef, i8 [[Y:%.*]], i32 0
-; CHECK-NEXT:    [[R:%.*]] = add <16 x i8> [[I0]], [[I1]]
-; CHECK-NEXT:    ret <16 x i8> [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i64 0
+; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 ;
   %i0 = insertelement <16 x i8> undef, i8 %x, i32 0
   %i1 = insertelement <16 x i8> undef, i8 %y, i32 0
@@ -23,10 +22,9 @@
 
 define <8 x i16> @ins0_ins0_sub_flags(i16 %x, i16 %y) {
 ; CHECK-LABEL: @ins0_ins0_sub_flags(
-; CHECK-NEXT:    [[I0:%.*]] = insertelement <8 x i16> undef, i16 [[X:%.*]], i8 5
-; CHECK-NEXT:    [[I1:%.*]] = insertelement <8 x i16> undef, i16 [[Y:%.*]], i32 5
-; CHECK-NEXT:    [[R:%.*]] = sub nuw nsw <8 x i16> [[I0]], [[I1]]
-; CHECK-NEXT:    ret <8 x i16> [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub nuw nsw i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i64 5
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 ;
   %i0 = insertelement <8 x i16> undef, i16 %x, i8 5
   %i1 = insertelement <8 x i16> undef, i16 %y, i32 5
@@ -38,10 +36,9 @@
 
 define <2 x double> @ins0_ins0_fadd(double %x, double %y) {
 ; CHECK-LABEL: @ins0_ins0_fadd(
-; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
-; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x double> undef, double [[Y:%.*]], i32 0
-; CHECK-NEXT:    [[R:%.*]] = fadd reassoc nsz <2 x double> [[I0]], [[I1]]
-; CHECK-NEXT:    ret <2 x double> [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double [[TMP1]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP2]]
 ;
   %i0 = insertelement <2 x double> undef, double %x, i32 0
   %i1 = insertelement <2 x double> undef, double %y, i32 0