Index: llvm/docs/CodeGenerator.rst
===================================================================
--- llvm/docs/CodeGenerator.rst
+++ llvm/docs/CodeGenerator.rst
@@ -912,6 +912,25 @@
 (and which of the above three actions to take) by calling the
 ``setOperationAction`` method in its ``TargetLowering`` constructor.
 
+If a target supports vector types, then the IR optimizer assumes that it can
+create some forms of the shufflevector IR instruction as canonical IR. Therefore,
+a target is expected to produce efficient machine code for those forms of the
+shufflevector IR instruction using custom legalization if needed. The shuffle
+forms that should be handled include:
+
+* Splat --- All elements of the vector have identical scalar elements. This
+operation may also be known as a "broadcast" or "duplicate" in target assembly.
+
+* Select --- Each element of the vector is chosen from the corresponding lane
+of 1 of 2 input vectors (no lane crossings). This operation may also be known as
+a "blend" or "bitwise select" in target assembly.
+
+* Insert subvector --- The vector is placed into a longer vector type starting
+at index 0.
+
+* Extract subvector --- The vector is pulled from a longer vector type starting
+at index 0.
+
 Prior to the existence of the Legalize passes, we required that every target
 `selector`_ supported and handled every operator and type even if they are not
 natively supported.  The introduction of the Legalize phases allows all of the
Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1608,6 +1608,45 @@
   return nullptr;
 }
 
+static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
+  // Match the operands as identity with padding (also known as
+  // insert_subvector into undef) shuffles of the same source type.
+  auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
+  auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
+  if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
+      !Shuffle1 || !Shuffle1->isIdentityWithPadding())
+    return nullptr;
+
+  Value *X = Shuffle0->getOperand(0);
+  Value *Y = Shuffle1->getOperand(0);
+  if (X->getType() != Y->getType() || isa<UndefValue>(X) || isa<UndefValue>(Y))
+    return nullptr;
+  assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
+         isa<UndefValue>(Shuffle1->getOperand(1)) &&
+         "Unexpected operand for identity shuffle");
+
+  // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
+  // operands directly by adjusting the shuffle mask to account for the narrower
+  // types:
+  // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
+  int NarrowElts = X->getType()->getVectorNumElements();
+  int WideElts = Shuffle0->getType()->getVectorNumElements();
+  assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
+
+  Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext());
+  SmallVector<int, 16> Mask = Shuf.getShuffleMask();
+  SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty));
+  for (int i = 0, e = Mask.size(); i != e; ++i) {
+    if (Mask[i] == -1)
+      continue;
+    if (Mask[i] < WideElts)
+      NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
+    else
+      NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
+  }
+  return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
+}
+
 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   Value *LHS = SVI.getOperand(0);
   Value *RHS = SVI.getOperand(1);
@@ -1662,10 +1701,12 @@
   if (Instruction *I = foldIdentityExtractShuffle(SVI))
     return I;
 
-  // This transform has the potential to lose undef knowledge, so it is
+  // These transforms have the potential to lose undef knowledge, so they are
   // intentionally placed after SimplifyDemandedVectorElts().
   if (Instruction *I = foldShuffleWithInsert(SVI))
     return I;
+  if (Instruction *I = foldIdentityPaddedShuffles(SVI))
+    return I;
 
   if (VWidth == LHSWidth) {
     // Analyze the shuffle, are the LHS or RHS and identity shuffles?
Index: llvm/test/Transforms/InstCombine/vec_shuffle.ll
===================================================================
--- llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -1142,9 +1142,7 @@
 
 define <7 x i8> @insert_subvector_shuffles(<3 x i8> %x, <3 x i8> %y) {
 ; CHECK-LABEL: @insert_subvector_shuffles(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> undef, <7 x i32> <i32 undef, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <7 x i32> <i32 0, i32 8, i32 1, i32 undef, i32 8, i32 1, i32 9>
+; CHECK-NEXT:    [[S3:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]], <7 x i32> <i32 0, i32 4, i32 1, i32 undef, i32 4, i32 1, i32 5>
 ; CHECK-NEXT:    ret <7 x i8> [[S3]]
 ;
   %s1 = shufflevector <3 x i8> %x, <3 x i8> undef, <7 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1157,9 +1155,7 @@
 
 define <2 x i8> @insert_subvector_shuffles_narrowing(<3 x i8> %x, <3 x i8> %y) {
 ; CHECK-LABEL: @insert_subvector_shuffles_narrowing(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> undef, <7 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <2 x i32> <i32 0, i32 8>
+; CHECK-NEXT:    [[S3:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 4>
 ; CHECK-NEXT:    ret <2 x i8> [[S3]]
 ;
   %s1 = shufflevector <3 x i8> %x, <3 x i8> undef, <7 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1172,9 +1168,7 @@
 
 define <4 x double> @insert_subvector_shuffles_identity(<2 x double> %x) {
 ; CHECK-LABEL: @insert_subvector_shuffles_identity(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
+; CHECK-NEXT:    [[S3:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret <4 x double> [[S3]]
 ;
   %s1 = shufflevector <2 x double> %x, <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>