Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -566,6 +566,46 @@
   return nullptr;
 }
 
+/// Transform sequences of insertelements ops with constant data/indexes into a
+/// single shuffle op.
+static ShuffleVectorInst *
+foldInsertEltsWithConstantDataToShuffle(InsertElementInst *IE,
+                                        InstCombiner &IC) {
+  unsigned NumElts = IE->getType()->getNumElements();
+
+  SmallVector<Constant *, 4> Values(NumElts, nullptr);
+  SmallVector<Constant *, 4> Mask(NumElts, nullptr);
+  Value *V = IE;
+  while (auto *VecOp = dyn_cast<InsertElementInst>(V)) {
+    uint64_t InsertIdx;
+    if (!match(VecOp->getOperand(2), m_ConstantInt(InsertIdx)))
+      break;
+    Constant *Val;
+    if (!match(VecOp->getOperand(1), m_Constant(Val)))
+      break;
+    if (!Values[InsertIdx]) {
+      assert(!Mask[InsertIdx]);
+      Mask[InsertIdx] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                                         NumElts + InsertIdx);
+      Values[InsertIdx] = Val;
+    }
+    V = VecOp->getOperand(0);
+    if (VecOp != IE) {
+      IC.replaceInstUsesWith(*VecOp, V);
+      IC.eraseInstFromFunction(*VecOp);
+    }
+  }
+  for (unsigned I = 0; I < NumElts; ++I) {
+    if (!Values[I]) {
+      assert(!Mask[I]);
+      Values[I] = UndefValue::get(IE->getType()->getElementType());
+      Mask[I] = ConstantInt::get(Type::getInt32Ty(V->getContext()), I);
+    }
+  }
+  return new ShuffleVectorInst(V, ConstantVector::get(Values),
+                               ConstantVector::get(Mask));
+}
+
 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
   Value *VecOp    = IE.getOperand(0);
   Value *ScalarOp = IE.getOperand(1);
@@ -615,6 +655,29 @@
       }
     }
   }
+  if (auto *II = dyn_cast<InsertElementInst>(VecOp)) {
+    // If the inserted element is inserted to the same vector, and if the
+    // indexes and data are constant, try to turn this into a shufflevector
+    // operation.
+    uint64_t InsertIdx;
+    if (match(II->getOperand(2), m_ConstantInt()) &&
+        match(IdxOp, m_ConstantInt(InsertIdx)) &&
+        match(II->getOperand(1), m_Constant()) &&
+        match(ScalarOp, m_Constant())) {
+      unsigned NumInsertVectorElts = IE.getType()->getNumElements();
+
+      if (InsertIdx >= NumInsertVectorElts) // Out of range insert.
+        return replaceInstUsesWith(IE, UndefValue::get(IE.getType()));
+
+      // If this insertelement isn't used by some other insertelement, turn
+      // it (and any insertelements it points to), into one big shuffle.
+      auto *IEBack = dyn_cast_or_null<InsertElementInst>(IE.user_back());
+      if (!IE.hasOneUse() || !IEBack ||
+          !match(IEBack->getOperand(2), m_ConstantInt()) ||
+          !match(IEBack->getOperand(1), m_Constant()))
+        return foldInsertEltsWithConstantDataToShuffle(&IE, *this);
+    }
+  }
 
   unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
   APInt UndefElts(VWidth, 0);
Index: test/Transforms/InstCombine/insert-const-shuf.ll
===================================================================
--- test/Transforms/InstCombine/insert-const-shuf.ll
+++ test/Transforms/InstCombine/insert-const-shuf.ll
@@ -14,13 +14,12 @@
   ret <4 x float> %ins
 }
 
-; TODO: A chain of inserts should collapse.
+; A chain of inserts should collapse.
 
 define <4 x float> @twoInserts(<4 x float> %x) {
 ; CHECK-LABEL: @twoInserts(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> %x, <4 x float> <float undef, float 0.000000e+00, float undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
-; CHECK-NEXT:    [[INS1:%.*]] = insertelement <4 x float> [[SHUF]], float 4.200000e+01, i32 2
-; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 1.100000e+01, i32 3
+; CHECK-NEXT:    [[INS2:%.*]] = shufflevector <4 x float> [[SHUF]], <4 x float> <float undef, float undef, float 4.200000e+01, float 1.100000e+01>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
 ; CHECK-NEXT:    ret <4 x float> [[INS2]]
 ;
   %shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
Index: test/Transforms/InstCombine/vec_demanded_elts.ll
===================================================================
--- test/Transforms/InstCombine/vec_demanded_elts.ll
+++ test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -1,32 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define i16 @test1(float %f) {
-entry:
 ; CHECK-LABEL: @test1(
-; CHECK: fmul float
-; CHECK-NOT: insertelement {{.*}} 0.00
-; CHECK-NOT: call {{.*}} @llvm.x86.sse.mul
-; CHECK-NOT: call {{.*}} @llvm.x86.sse.sub
-; CHECK: ret
-	%tmp = insertelement <4 x float> undef, float %f, i32 0		; <<4 x float>> [#uses=1]
-	%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
-	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
-	%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
-	%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
-	%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
-	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
-	%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )		; <<4 x float>> [#uses=1]
-	%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
-	%tmp69 = trunc i32 %tmp.upgrd.1 to i16		; <i16> [#uses=1]
-	ret i16 %tmp69
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP281:%.*]] = fadd float %f, -1.000000e+00
+; CHECK-NEXT:    [[TMP373:%.*]] = fmul float [[TMP281]], 5.000000e-01
+; CHECK-NEXT:    [[TMP374:%.*]] = insertelement <4 x float> undef, float [[TMP373]], i32 0
+; CHECK-NEXT:    [[TMP48:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP374]], <4 x float> <float 6.553500e+04, float undef, float undef, float undef>)
+; CHECK-NEXT:    [[TMP59:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP48]], <4 x float> <float 0.000000e+00, float undef, float undef, float undef>)
+; CHECK-NEXT:    [[TMP_UPGRD_1:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[TMP59]])
+; CHECK-NEXT:    [[TMP69:%.*]] = trunc i32 [[TMP_UPGRD_1]] to i16
+; CHECK-NEXT:    ret i16 [[TMP69]]
+;
+entry:
+  %tmp = insertelement <4 x float> undef, float %f, i32 0		; <<4 x float>> [#uses=1]
+  %tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+  %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+  %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+  %tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
+  %tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
+  %tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
+  %tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )		; <<4 x float>> [#uses=1]
+  %tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
+  %tmp69 = trunc i32 %tmp.upgrd.1 to i16		; <i16> [#uses=1]
+  ret i16 %tmp69
 }
 
 define i32 @test2(float %f) {
 ; CHECK-LABEL: @test2(
-; CHECK-NOT: insertelement
-; CHECK-NOT: extractelement
-; CHECK: ret
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul float %f, %f
+; CHECK-NEXT:    [[TMP21:%.*]] = bitcast float [[TMP5]] to i32
+; CHECK-NEXT:    ret i32 [[TMP21]]
+;
   %tmp5 = fmul float %f, %f
   %tmp9 = insertelement <4 x float> undef, float %tmp5, i32 0
   %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
@@ -39,8 +46,33 @@
 
 define i64 @test3(float %f, double %d) {
 ; CHECK-LABEL: @test3(
-; CHECK-NOT: insertelement {{.*}} 0.00
-; CHECK: ret
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V00:%.*]] = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> [[V00]])
+; CHECK-NEXT:    [[V10:%.*]] = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> [[V10]])
+; CHECK-NEXT:    [[V20:%.*]] = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[V20]])
+; CHECK-NEXT:    [[V30:%.*]] = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> [[V30]])
+; CHECK-NEXT:    [[V40:%.*]] = insertelement <2 x double> undef, double %d, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[V40]])
+; CHECK-NEXT:    [[V50:%.*]] = insertelement <2 x double> undef, double %d, i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> [[V50]])
+; CHECK-NEXT:    [[V60:%.*]] = insertelement <2 x double> undef, double %d, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[V60]])
+; CHECK-NEXT:    [[V70:%.*]] = insertelement <2 x double> undef, double %d, i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> [[V70]])
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP11]], [[TMP14]]
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
 entry:
   %v00 = insertelement <4 x float> undef, float %f, i32 0
   %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
@@ -87,8 +119,14 @@
 
 define void @get_image() nounwind {
 ; CHECK-LABEL: @get_image(
-; CHECK-NOT: extractelement
-; CHECK: unreachable
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @fgetc(i8* null) #0
+; CHECK-NEXT:    br i1 false, label %bb2, label %bb3
+; CHECK:       bb2:
+; CHECK-NEXT:    br label %bb3
+; CHECK:       bb3:
+; CHECK-NEXT:    unreachable
+;
 entry:
   %0 = call i32 @fgetc(i8* null) nounwind               ; <i32> [#uses=1]
   %1 = trunc i32 %0 to i8         ; <i8> [#uses=1]
@@ -107,16 +145,18 @@
 ; PR4340
 define void @vac(<4 x float>* nocapture %a) nounwind {
 ; CHECK-LABEL: @vac(
-; CHECK-NOT: load
-; CHECK: ret
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store <4 x float> zeroinitializer, <4 x float>* %a, align 16
+; CHECK-NEXT:    ret void
+;
 entry:
-	%tmp1 = load <4 x float>, <4 x float>* %a		; <<4 x float>> [#uses=1]
-	%vecins = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 0	; <<4 x float>> [#uses=1]
-	%vecins4 = insertelement <4 x float> %vecins, float 0.000000e+00, i32 1; <<4 x float>> [#uses=1]
-	%vecins6 = insertelement <4 x float> %vecins4, float 0.000000e+00, i32 2; <<4 x float>> [#uses=1]
-	%vecins8 = insertelement <4 x float> %vecins6, float 0.000000e+00, i32 3; <<4 x float>> [#uses=1]
-	store <4 x float> %vecins8, <4 x float>* %a
-	ret void
+  %tmp1 = load <4 x float>, <4 x float>* %a		; <<4 x float>> [#uses=1]
+  %vecins = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 0	; <<4 x float>> [#uses=1]
+  %vecins4 = insertelement <4 x float> %vecins, float 0.000000e+00, i32 1; <<4 x float>> [#uses=1]
+  %vecins6 = insertelement <4 x float> %vecins4, float 0.000000e+00, i32 2; <<4 x float>> [#uses=1]
+  %vecins8 = insertelement <4 x float> %vecins6, float 0.000000e+00, i32 3; <<4 x float>> [#uses=1]
+  store <4 x float> %vecins8, <4 x float>* %a
+  ret void
 }
 
 declare i32 @fgetc(i8*)
@@ -139,9 +179,13 @@
 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
 
 define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind {
+; CHECK-LABEL: @dead_shuffle_elt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %y, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[SHUFFLE9_I:%.*]] = shufflevector <4 x float> %x, <4 x float> [[SHUFFLE_I]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[SHUFFLE9_I]]
+;
 entry:
-; CHECK-LABEL: define <4 x float> @dead_shuffle_elt(
-; CHECK: shufflevector <2 x float> %y, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   %shuffle.i = shufflevector <2 x float> %y, <2 x float> %y, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   %shuffle9.i = shufflevector <4 x float> %x, <4 x float> %shuffle.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
   ret <4 x float> %shuffle9.i
@@ -149,9 +193,12 @@
 
 define <2 x float> @test_fptrunc(double %f) {
 ; CHECK-LABEL: @test_fptrunc(
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK-NOT: insertelement
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x double> undef, double %f, i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> <double undef, double 0.000000e+00, double undef, double undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP5:%.*]] = fptrunc <4 x double> [[TMP12]] to <4 x float>
+; CHECK-NEXT:    [[RET:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    ret <2 x float> [[RET]]
+;
   %tmp9 = insertelement <4 x double> undef, double %f, i32 0
   %tmp10 = insertelement <4 x double> %tmp9, double 0.000000e+00, i32 1
   %tmp11 = insertelement <4 x double> %tmp10, double 0.000000e+00, i32 2
@@ -163,9 +210,12 @@
 
 define <2 x double> @test_fpext(float %f) {
 ; CHECK-LABEL: @test_fpext(
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK-NOT: insertelement
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> <float undef, float 0.000000e+00, float undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP5:%.*]] = fpext <4 x float> [[TMP12]] to <4 x double>
+; CHECK-NEXT:    [[RET:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    ret <2 x double> [[RET]]
+;
   %tmp9 = insertelement <4 x float> undef, float %f, i32 0
   %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
   %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
@@ -177,11 +227,11 @@
 
 define <4 x float> @test_select(float %f, float %g) {
 ; CHECK-LABEL: @test_select(
-; CHECK: %a0 = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NOT: insertelement
-; CHECK: %a3 = insertelement <4 x float> %a0, float 3.000000e+00, i32 3
-; CHECK-NOT: insertelement
-; CHECK: %ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>
+; CHECK-NEXT:    [[A0:%.*]] = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NEXT:    [[A3:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> <float undef, float undef, float undef, float 3.000000e+00>, <4 x i32> <i32 0, i32 undef, i32 undef, i32 7>
+; CHECK-NEXT:    [[RET:%.*]] = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> [[A3]], <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>
+; CHECK-NEXT:    ret <4 x float> [[RET]]
+;
   %a0 = insertelement <4 x float> undef, float %f, i32 0
   %a1 = insertelement <4 x float> %a0, float 1.000000e+00, i32 1
   %a2 = insertelement <4 x float> %a1, float 2.000000e+00, i32 2
@@ -195,12 +245,13 @@
 }
 
 define <2 x i64> @PR24922(<2 x i64> %v) {
-; CHECK-LABEL: @PR24922
-; CHECK: select <2 x i1> 
+; CHECK-LABEL: @PR24922(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RESULT:%.*]] = select <2 x i1> <i1 false, i1 true>, <2 x i64> %v, <2 x i64> <i64 0, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> [[RESULT]]
 ;
 ; Check that instcombine doesn't wrongly fold the select statement into a
 ; ret <2 x i64> %v
-;
 ; FIXME: We should be able to simplify the ConstantExpr in the select mask.
 entry:
   %result = select <2 x i1> <i1 icmp eq (i64 extractelement (<2 x i64> bitcast (<4 x i32> <i32 15, i32 15, i32 15, i32 15> to <2 x i64>), i64 0), i64 0), i1 true>, <2 x i64> %v, <2 x i64> zeroinitializer
Index: test/Transforms/InstCombine/vector_insertelt_shuffle.ll
===================================================================
--- test/Transforms/InstCombine/vector_insertelt_shuffle.ll
+++ test/Transforms/InstCombine/vector_insertelt_shuffle.ll
@@ -7,10 +7,9 @@
   ret<4 x float> %ins2
 }
 
-; FIXME: insertelements should fold to shuffle
+; insertelements should fold to shuffle
 ; CHECK-LABEL: @foo
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 1
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2
+; CHECK-NEXT: shufflevector <4 x float> %{{.+}}, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
 ; CHECK-NEXT: ret <4 x float> %
 
 define<4 x float> @bar(<4 x float> %x, float %a) {
@@ -45,12 +44,11 @@
   ret<4 x float> %ins6
 }
 
-; FIXME: insertelements should fold to shuffle
+; insertelements should fold to shuffle
 ; CHECK-LABEL: @bazz
 ; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 3
 ; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 5.000000e+00, i32 %
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 1
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2
+; CHECK-NEXT: shufflevector <4 x float> %{{.+}}, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
 ; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 7.000000e+00, i32 %
 ; CHECK-NEXT: ret <4 x float> %