Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4110,6 +4110,7 @@
 // shuffle in combination with VEXTs.
 SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
                                                   SelectionDAG &DAG) const {
+  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
   SDLoc dl(Op);
   EVT VT = Op.getValueType();
   unsigned NumElts = VT.getVectorNumElements();
@@ -4162,6 +4163,56 @@
   // This loop extracts the usage patterns of the source vectors
   // and prepares appropriate SDValues for a shuffle if possible.
   for (unsigned i = 0; i < SourceVecs.size(); ++i) {
+    if (SourceVecs[i].getValueType().getVectorElementType() !=
+        VT.getVectorElementType()) {
+      if (SourceVecs[i].getOpcode() == ISD::AssertSext ||
+          SourceVecs[i].getOpcode() == ISD::AssertZext) {
+        // For AssertSext/AssertZext, we need to bitcast it to the vector which
+        // holds asserted element type, and modify the extracted lane number
+        // pointing to correct lane. For example, if a v2i32 AssertSext node
+        // asserts it holds 2 of i16 elements, firstly it will bitcast to v4i16.
+        // then all lane number in EXTRACT_VECTOR_ELT extracting on it will be
+        // doubled. Finally rebuild a new BUILD_VECTOR operating on those newly
+        // created EXTRACT_VECTOR_ELTs to replace old Op.
+        EVT AssertTy = cast<VTSDNode>(SourceVecs[i].getOperand(1))->getVT();
+        EVT AssertVT = EVT::getVectorVT(*DAG.getContext(), AssertTy,
+                                        SourceVecs[i].getValueSizeInBits() /
+                                            AssertTy.getSizeInBits());
+        EVT LegalTy = Op.getOperand(0).getValueType();
+        // Create BITCAST on AssertSext/AssertZext to get a vector which element
+        // type is AssertTy.
+        SDValue BitCst = DAG.getNode(ISD::BITCAST, dl, AssertVT, SourceVecs[i]);
+        unsigned OffsetMultipliers =
+            AssertVT.getVectorNumElements() /
+            SourceVecs[i].getValueType().getVectorNumElements();
+        // Collect operands to create new BUILD_VECTOR node, lanes in extracting
+        // SourceVecs[i] should multiply OffsetMultipliers.
+        SmallVector<SDValue, 16> BuildSrc;
+        for (unsigned j = 0; j < NumElts; ++j) {
+          if (Op.getOperand(j).getOperand(0) != SourceVecs[i]) {
+            BuildSrc.push_back(Op.getOperand(j));
+            continue;
+          }
+          unsigned OriginLane =
+              cast<ConstantSDNode>(Op.getOperand(j).getOperand(1))
+                  ->getSExtValue();
+          SDValue ExtElt = DAG.getNode(
+              ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, BitCst,
+              DAG.getIntPtrConstant(OriginLane * OffsetMultipliers));
+          BuildSrc.push_back(ExtElt);
+        }
+        // Create new BUILD_VECTOR to replace old one.
+        Op = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+                         makeArrayRef(BuildSrc.data(), NumElts));
+        SourceVecs[i] = BitCst;
+        MaxElts[i] *= OffsetMultipliers;
+        MinElts[i] *= OffsetMultipliers;
+      } else {
+        // Don't attempt to extract subvectors from BUILD_VECTOR sources
+        // that expand or trunc the original value.
+        return SDValue();
+      }
+    }
     if (SourceVecs[i].getValueType() == VT) {
       // No VEXT necessary
       ShuffleSrcs[i] = SourceVecs[i];
@@ -4175,15 +4226,6 @@
       continue;
     }
 
-    // Don't attempt to extract subvectors from BUILD_VECTOR sources
-    // that expand or trunc the original value.
-    // TODO: We can try to bitcast and ANY_EXTEND the result but
-    // we need to consider the cost of vector ANY_EXTEND, and the
-    // legality of all the types.
-    if (SourceVecs[i].getValueType().getVectorElementType() !=
-        VT.getVectorElementType())
-      return SDValue();
-
     // Since only 64-bit and 128-bit vectors are legal on ARM and
     // we've eliminated the other cases...
     assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts &&
Index: test/CodeGen/AArch64/arm64-convert-v4f64.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=arm64 | FileCheck %s
+
+
+define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
+; CHECK: fptosi_v4f64_to_v4i16
+; CHECK-DAG: fcvtzs	v[[LHS:[0-9]+]].2d, v1.2d
+; CHECK-DAG: fcvtzs	v[[RHS:[0-9]+]].2d, v0.2d
+; CHECK-DAG: xtn	v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d
+; CHECK-DAG: xtn	v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d
+; CHECK:     uzp1	v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h
+  %tmp1 = load <4 x double>* %ptr
+  %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
+  ret <4 x i16> %tmp2
+}
+
+define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
+; CHECK: fptosi_v4f64_to_v4i8
+; CHECK-DAG:  fcvtzs	v[[CONV3:[0-9]+]].2d, v3.2d
+; CHECK-DAG:  fcvtzs	v[[CONV2:[0-9]+]].2d, v2.2d
+; CHECK-DAG:  fcvtzs	v[[CONV1:[0-9]+]].2d, v1.2d
+; CHECK-DAG:  fcvtzs	v[[CONV0:[0-9]+]].2d, v0.2d
+; CHECK-DAG:  xtn	v[[NA3:[0-9]+]].2s, v[[CONV3]].2d
+; CHECK-DAG:  xtn	v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
+; CHECK-DAG:  xtn	v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
+; CHECK-DAG:  xtn	v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
+; CHECK-DAG:  uzp1	v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h
+; CHECK-DAG:  uzp1	v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h
+; CHECK:      uzp1	v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
+  %tmp1 = load <8 x double>* %ptr
+  %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
+  ret <8 x i8> %tmp2
+}