Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -475,7 +475,8 @@
     SDValue reduceBuildVecToShuffle(SDNode *N);
     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
                                   ArrayRef<int> VectorMask, SDValue VecIn1,
-                                  SDValue VecIn2, unsigned LeftIdx);
+                                  SDValue VecIn2, unsigned LeftIdx,
+                                  bool DidSplitVec);
     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 
     /// Walk up chain skipping non-aliasing memory nodes,
@@ -16393,7 +16394,7 @@
 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
                                            ArrayRef<int> VectorMask,
                                            SDValue VecIn1, SDValue VecIn2,
-                                           unsigned LeftIdx) {
+                                           unsigned LeftIdx, bool DidSplitVec) {
   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
 
@@ -16401,17 +16402,12 @@
   EVT InVT1 = VecIn1.getValueType();
   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
 
-  unsigned Vec2Offset = 0;
   unsigned NumElems = VT.getVectorNumElements();
   unsigned ShuffleNumElems = NumElems;
 
-  // In case both the input vectors are extracted from same base
-  // vector we do not need extra addend (Vec2Offset) while
-  // computing shuffle mask.
-  if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
-      !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
-      !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
-    Vec2Offset = InVT1.getVectorNumElements();
+  // If we artificially split a vector in two already, then the offsets in the
+  // operands will all be based off of VecIn1, even those in VecIn2.
+  unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
 
   // We can't generate a shuffle node with mismatched input and output types.
   // Try to make the types match the type of the output.
@@ -16670,6 +16666,7 @@
   // vector, then split the vector efficiently based on the maximum
   // vector access index and adjust the VectorMask and
   // VecIn accordingly.
+  bool DidSplitVec = false;
   if (VecIn.size() == 2) {
     unsigned MaxIndex = 0;
     unsigned NearestPow2 = 0;
@@ -16700,6 +16697,7 @@
         VecIn.pop_back();
         VecIn.push_back(VecIn1);
         VecIn.push_back(VecIn2);
+        DidSplitVec = true;
 
         for (unsigned i = 0; i < NumElems; i++) {
           if (VectorMask[i] <= 0)
@@ -16737,7 +16735,7 @@
         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
 
     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
-                                                VecRight, LeftIdx))
+                                                VecRight, LeftIdx, DidSplitVec))
       Shuffles.push_back(Shuffle);
     else
       return SDValue();
Index: test/CodeGen/X86/shuffle-extract-subvector.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/shuffle-extract-subvector.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+define void @f(<4 x half>* %a, <4 x half>* %b, <8 x half>* %c) {
+; CHECK-LABEL: f:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movzwl (%rdi), %r8d
+; CHECK-NEXT:    movzwl 2(%rdi), %r9d
+; CHECK-NEXT:    movzwl 4(%rdi), %r11d
+; CHECK-NEXT:    movzwl 6(%rdi), %edi
+; CHECK-NEXT:    movzwl (%rsi), %r10d
+; CHECK-NEXT:    movzwl 2(%rsi), %ecx
+; CHECK-NEXT:    movzwl 4(%rsi), %eax
+; CHECK-NEXT:    movzwl 6(%rsi), %esi
+; CHECK-NEXT:    movw %si, 14(%rdx)
+; CHECK-NEXT:    movw %di, 12(%rdx)
+; CHECK-NEXT:    movw %ax, 10(%rdx)
+; CHECK-NEXT:    movw %r11w, 8(%rdx)
+; CHECK-NEXT:    movw %cx, 6(%rdx)
+; CHECK-NEXT:    movw %r9w, 4(%rdx)
+; CHECK-NEXT:    movw %r10w, 2(%rdx)
+; CHECK-NEXT:    movw %r8w, (%rdx)
+; CHECK-NEXT:    retq
+  %tmp4 = load <4 x half>, <4 x half>* %a
+  %tmp5 = load <4 x half>, <4 x half>* %b
+  %tmp7 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 0, i32 4>
+  %tmp8 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 1, i32 5>
+  %tmp9 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 2, i32 6>
+  %tmp10 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 3, i32 7>
+  %tmp11 = extractelement <2 x half> %tmp7, i32 0
+  %tmp12 = insertelement <8 x half> undef, half %tmp11, i32 0
+  %tmp13 = extractelement <2 x half> %tmp7, i32 1
+  %tmp14 = insertelement <8 x half> %tmp12, half %tmp13, i32 1
+  %tmp15 = extractelement <2 x half> %tmp8, i32 0
+  %tmp16 = insertelement <8 x half> %tmp14, half %tmp15, i32 2
+  %tmp17 = extractelement <2 x half> %tmp8, i32 1
+  %tmp18 = insertelement <8 x half> %tmp16, half %tmp17, i32 3
+  %tmp19 = extractelement <2 x half> %tmp9, i32 0
+  %tmp20 = insertelement <8 x half> %tmp18, half %tmp19, i32 4
+  %tmp21 = extractelement <2 x half> %tmp9, i32 1
+  %tmp22 = insertelement <8 x half> %tmp20, half %tmp21, i32 5
+  %tmp23 = extractelement <2 x half> %tmp10, i32 0
+  %tmp24 = insertelement <8 x half> %tmp22, half %tmp23, i32 6
+  %tmp25 = extractelement <2 x half> %tmp10, i32 1
+  %tmp26 = insertelement <8 x half> %tmp24, half %tmp25, i32 7
+  store <8 x half> %tmp26, <8 x half>* %c
+  ret void
+}