Index: lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1048,6 +1048,8 @@
     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
 
     bool NewUndefElts = false;
+    unsigned LHSIdx = -1u;
+    unsigned RHSIdx = -1u;
     for (unsigned i = 0; i < VWidth; i++) {
       unsigned MaskVal = Shuffle->getMaskValue(i);
       if (MaskVal == -1u) {
@@ -1059,15 +1061,50 @@
         if (UndefElts4[MaskVal]) {
           NewUndefElts = true;
           UndefElts.setBit(i);
-        }
+        } else
+          LHSIdx = LHSIdx == -1u ? MaskVal : LHSVWidth;
       } else {
         if (UndefElts3[MaskVal - LHSVWidth]) {
           NewUndefElts = true;
           UndefElts.setBit(i);
-        }
+        } else
+          RHSIdx = RHSIdx == -1u ? MaskVal - LHSVWidth : LHSVWidth;
       }
     }
 
+    // Try to transform shuffle with constant vector and single element from
+    // this constant vector to single insertelement instruction.
+    // shufflevector V, C, <v1, v2, .., ci, .., vm> ->
+    // insertelement V, C[ci], ci-n
+    if (LHSVWidth == Shuffle->getType()->getNumElements()) {
+      Value *Op = nullptr;
+      Constant *Value = nullptr;
+      unsigned Idx = -1u;
+
+      // Find constant vector wigth the single element in shuffle (LHS or RHS).
+      if (LHSIdx < LHSVWidth) {
+        if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) {
+          Op = Shuffle->getOperand(1);
+          Value = CV->getOperand(LHSIdx);
+          Idx = LHSIdx;
+        }
+      }
+      if (RHSIdx < LHSVWidth) {
+        if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) {
+          Op = Shuffle->getOperand(0);
+          Value = CV->getOperand(RHSIdx);
+          Idx = RHSIdx;
+        }
+      }
+      // Found constant vector with single element - convert to insertelement.
+      if (Op && Value) {
+        Instruction *New = InsertElementInst::Create(
+            Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx),
+            Shuffle->getName());
+        InsertNewInstWith(New, *Shuffle);
+        return New;
+      }
+    }
     if (NewUndefElts) {
       // Add additional discovered undefs.
       SmallVector<Constant*, 16> Elts;
Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -585,58 +585,104 @@
   return true;
 }
 
-/// insertelt (shufflevector X, CVec, Mask), C, CIndex -->
-/// shufflevector X, CVec', Mask'
+/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
+/// --> shufflevector X, CVec', Mask'
 static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
-  // Bail out if the shuffle has more than one use. In that case, we'd be
+  auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
+  // Bail out if the parent has more than one use. In that case, we'd be
   // replacing the insertelt with a shuffle, and that's not a clear win.
-  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
-  if (!Shuf || !Shuf->hasOneUse())
+  if (!Inst || !Inst->hasOneUse())
     return nullptr;
+  Value *X = nullptr;
+  Constant *NewShufVec = nullptr;
+  Constant *NewMask = nullptr;
+  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
+    // The shuffle must have a constant vector operand. The insertelt must have
+    // a constant scalar being inserted at a constant position in the vector.
+    Constant *ShufConstVec, *InsEltScalar;
+    uint64_t InsEltIndex;
+    if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
+        !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
+        !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
+      return nullptr;
 
-  // The shuffle must have a constant vector operand. The insertelt must have a
-  // constant scalar being inserted at a constant position in the vector.
-  Constant *ShufConstVec, *InsEltScalar;
-  uint64_t InsEltIndex;
-  if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
-      !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
-      !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
-    return nullptr;
+    // Adding an element to an arbitrary shuffle could be expensive, but a
+    // shuffle that selects elements from vectors without crossing lanes is
+    // assumed cheap.
+    // If we're just adding a constant into that shuffle, it will still be
+    // cheap.
+    if (!isShuffleEquivalentToSelect(*Shuf))
+      return nullptr;
 
-  // Adding an element to an arbitrary shuffle could be expensive, but a shuffle
-  // that selects elements from vectors without crossing lanes is assumed cheap.
-  // If we're just adding a constant into that shuffle, it will still be cheap.
-  if (!isShuffleEquivalentToSelect(*Shuf))
-    return nullptr;
+    // From the above 'select' check, we know that the mask has the same number
+    // of elements as the vector input operands. We also know that each constant
+    // input element is used in its lane and can not be used more than once by
+    // the shuffle. Therefore, replace the constant in the shuffle's constant
+    // vector with the insertelt constant. Replace the constant in the shuffle's
+    // mask vector with the insertelt index plus the length of the vector
+    // (because the constant vector operand of a shuffle is always the 2nd
+    // operand).
+    Constant *Mask = Shuf->getMask();
+    unsigned NumElts = Mask->getType()->getVectorNumElements();
+    SmallVector<Constant *, 16> NewShufElts(NumElts);
+    SmallVector<Constant *, 16> NewMaskElts(NumElts);
+    for (unsigned I = 0; I != NumElts; ++I) {
+      if (I == InsEltIndex) {
+        NewShufElts[I] = InsEltScalar;
+        Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
+        NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
+      } else {
+        // Copy over the existing values.
+        NewShufElts[I] = ShufConstVec->getAggregateElement(I);
+        NewMaskElts[I] = Mask->getAggregateElement(I);
+      }
+    }
 
-  // From the above 'select' check, we know that the mask has the same number of
-  // elements as the vector input operands. We also know that each constant
-  // input element is used in its lane and can not be used more than once by the
-  // shuffle. Therefore, replace the constant in the shuffle's constant vector
-  // with the insertelt constant. Replace the constant in the shuffle's mask
-  // vector with the insertelt index plus the length of the vector (because the
-  // constant vector operand of a shuffle is always the 2nd operand).
-  Constant *Mask = Shuf->getMask();
-  unsigned NumElts = Mask->getType()->getVectorNumElements();
-  SmallVector<Constant*, 16> NewShufElts(NumElts);
-  SmallVector<Constant*, 16> NewMaskElts(NumElts);
-  for (unsigned i = 0; i != NumElts; ++i) {
-    if (i == InsEltIndex) {
-      NewShufElts[i] = InsEltScalar;
-      Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
-      NewMaskElts[i] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
-    } else {
-      // Copy over the existing values.
-      NewShufElts[i] = ShufConstVec->getAggregateElement(i);
-      NewMaskElts[i] = Mask->getAggregateElement(i);
+    // Create new operands for a shuffle that includes the constant of the
+    // original insertelt. The old shuffle will be dead now.
+    X = Shuf->getOperand(0);
+    NewShufVec = ConstantVector::get(NewShufElts);
+    NewMask = ConstantVector::get(NewMaskElts);
+  } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
+    // Transform sequences of insertelements ops with constant data/indexes into
+    // a single shuffle op.
+    unsigned NumElts = InsElt.getType()->getNumElements();
+
+    uint64_t InsertIdx[2];
+    Constant *Val[2];
+    if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
+        !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
+        !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
+        !match(IEI->getOperand(1), m_Constant(Val[1])))
+      return nullptr;
+    SmallVector<Constant *, 16> Values(NumElts);
+    SmallVector<Constant *, 16> Mask(NumElts);
+    auto ValI = std::begin(Val);
+    for (uint64_t I : InsertIdx) {
+      if (!Values[I]) {
+        assert(!Mask[I]);
+        Values[I] = *ValI;
+        Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()),
+                                   NumElts + I);
+      }
+      ++ValI;
     }
+    for (unsigned I = 0; I < NumElts; ++I) {
+      if (!Values[I]) {
+        assert(!Mask[I]);
+        Values[I] = UndefValue::get(InsElt.getType()->getElementType());
+        Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I);
+      }
+    }
+    // Create new operands for a shuffle that includes the constant of the
+    // original insertelt.
+    X = IEI->getOperand(0);
+    NewShufVec = ConstantVector::get(Values);
+    NewMask = ConstantVector::get(Mask);
   }
-
-  // Create new operands for a shuffle that includes the constant of the
-  // original insertelt. The old shuffle will be dead now.
-  Constant *NewShufVec = ConstantVector::get(NewShufElts);
-  Constant *NewMask = ConstantVector::get(NewMaskElts);
-  return new ShuffleVectorInst(Shuf->getOperand(0), NewShufVec, NewMask);
+  if (X && NewShufVec && NewMask)
+    return new ShuffleVectorInst(X, NewShufVec, NewMask);
+  return nullptr;
 }
 
 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
Index: test/Transforms/InstCombine/vec_demanded_elts.ll
===================================================================
--- test/Transforms/InstCombine/vec_demanded_elts.ll
+++ test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -215,8 +215,8 @@
 define <4 x float> @test_select(float %f, float %g) {
 ; CHECK-LABEL: @test_select(
 ; CHECK-NEXT:    [[A0:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[A3:%.*]] = insertelement <4 x float> [[A0]], float 3.000000e+00, i32 3
-; CHECK-NEXT:    [[RET:%.*]] = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> [[A3]], <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>
+; CHECK-NEXT:    [[A31:%.*]] = insertelement <4 x float> [[A0]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[RET:%.*]] = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> [[A31]], <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>
 ; CHECK-NEXT:    ret <4 x float> [[RET]]
 ;
   %a0 = insertelement <4 x float> undef, float %f, i32 0
Index: test/Transforms/InstCombine/vector_insertelt_shuffle.ll
===================================================================
--- test/Transforms/InstCombine/vector_insertelt_shuffle.ll
+++ test/Transforms/InstCombine/vector_insertelt_shuffle.ll
@@ -7,10 +7,9 @@
   ret<4 x float> %ins2
 }
 
-; FIXME: insertelements should fold to shuffle
+; insertelements should fold to shuffle
 ; CHECK-LABEL: @foo
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 1
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2
+; CHECK-NEXT: shufflevector <4 x float> %{{.+}}, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
 ; CHECK-NEXT: ret <4 x float> %
 
 define<4 x float> @bar(<4 x float> %x, float %a) {
@@ -45,12 +44,11 @@
   ret<4 x float> %ins6
 }
 
-; FIXME: insertelements should fold to shuffle
+; insertelements should fold to shuffle
 ; CHECK-LABEL: @bazz
 ; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 3
 ; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 5.000000e+00, i32 %
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 1
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2
+; CHECK-NEXT: shufflevector <4 x float> %{{.+}}, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
 ; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 7.000000e+00, i32 %
 ; CHECK-NEXT: ret <4 x float> %
 
Index: test/Transforms/InstCombine/x86-insertps.ll
===================================================================
--- test/Transforms/InstCombine/x86-insertps.ll
+++ test/Transforms/InstCombine/x86-insertps.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
@@ -5,146 +6,161 @@
 ; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
 
 define <4 x float> @insertps_non_const_imm(<4 x float> %v1, <4 x float> %v2, i8 %c) {
+; CHECK-LABEL: @insertps_non_const_imm(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_non_const_imm
-; CHECK-NEXT:  call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
-; CHECK-NEXT:  ret <4 x float>
 }
 
 ; If all zero mask bits are set, return a zero regardless of the other control bits.
 
 define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0x0f(
+; CHECK-NEXT:    ret <4 x float> zeroinitializer
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_0x0f
-; CHECK-NEXT:  ret <4 x float> zeroinitializer
 }
 define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0xff(
+; CHECK-NEXT:    ret <4 x float> zeroinitializer
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_0xff
-; CHECK-NEXT:  ret <4 x float> zeroinitializer
 }
 
 ; If some zero mask bits are set that do not override the insertion, we do not change anything.
 
 define <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0x0c(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_0x0c
-; CHECK-NEXT:  call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
-; CHECK-NEXT:  ret <4 x float>
 }
 
 ; ...unless both input vectors are the same operand.
 
 define <4 x float> @insertps_0x15_single_input(<4 x float> %v1) {
+; CHECK-LABEL: @insertps_0x15_single_input(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_0x15_single_input
-; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3>
-; CHECK-NEXT:  ret <4 x float>
 }
 
 ; The zero mask overrides the insertion lane.
 
 define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) {
+; CHECK-LABEL: @insertps_0x1a_single_input(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_0x1a_single_input
-; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:  ret <4 x float>
 }
 
 ; The zero mask overrides the insertion lane, so the second input vector is not used.
 
 define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0xc1(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %v1, float 0.000000e+00, i32 0
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_0xc1
-; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-; CHECK-NEXT:  ret <4 x float>
 }
 
 ; If no zero mask bits are set, convert to a shuffle.
 
 define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0x00(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_0x00
-; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-; CHECK-NEXT:  ret <4 x float>
 }
 
 define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0x10(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_0x10
-; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
-; CHECK-NEXT:  ret <4 x float>
 }
 
 define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0x20(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_0x20
-; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
-; CHECK-NEXT:  ret <4 x float>
 }
 
 define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0x30(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_0x30
-; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
-; CHECK-NEXT:  ret <4 x float>
 }
 
 define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0xc0(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_0xc0
-; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
-; CHECK-NEXT:  ret <4 x float>
 }
 
 define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0xd0(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_0xd0
-; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
-; CHECK-NEXT:  ret <4 x float>
 }
 
 define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0xe0(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_0xe0
-; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
-; CHECK-NEXT:  ret <4 x float>
 }
 
 define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0xf0(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240)
   ret <4 x float> %res
 
-; CHECK-LABEL: @insertps_0xf0
-; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT:  ret <4 x float>
 }