Index: include/llvm/Target/TargetLowering.h
===================================================================
--- include/llvm/Target/TargetLowering.h
+++ include/llvm/Target/TargetLowering.h
@@ -185,10 +185,15 @@
   /// Return true if the target has BitExtract instructions.
   bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
 
-  /// Return true if a vector of the given type should be split
-  /// (TypeSplitVector) instead of promoted (TypePromoteInteger) during type
-  /// legalization.
-  virtual bool shouldSplitVectorType(EVT /*VT*/) const { return false; }
+  /// Return the preferred vector type legalization action.
+  virtual TargetLoweringBase::LegalizeTypeAction
+  getPreferredVectorAction(EVT VT) const {
+    // The default action for one element vectors is to scalarize
+    if (VT.getVectorNumElements() == 1)
+      return TypeScalarizeVector;
+    // The default action for other vectors is to promote
+    return TypePromoteInteger;
+  }
 
   // There are two general methods for expanding a BUILD_VECTOR node:
   //  1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
Index: lib/CodeGen/TargetLoweringBase.cpp
===================================================================
--- lib/CodeGen/TargetLoweringBase.cpp
+++ lib/CodeGen/TargetLoweringBase.cpp
@@ -1084,24 +1084,25 @@
   // Loop over all of the vector value types to see which need transformations.
   for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
-    MVT VT = (MVT::SimpleValueType)i;
-    if (isTypeLegal(VT)) continue;
+    MVT VT = (MVT::SimpleValueType) i;
+    if (isTypeLegal(VT))
+      continue;
 
-    // Determine if there is a legal wider type.  If so, we should promote to
-    // that wider vector type.
     MVT EltVT = VT.getVectorElementType();
     unsigned NElts = VT.getVectorNumElements();
-    if (NElts != 1 && !shouldSplitVectorType(VT)) {
-      bool IsLegalWiderType = false;
-      // First try to promote the elements of integer vectors. If no legal
-      // promotion was found, fallback to the widen-vector method.
-      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
-        MVT SVT = (MVT::SimpleValueType)nVT;
+    bool IsLegalWiderType = false;
+    LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
+    switch (PreferredAction) {
+    case TypePromoteInteger: {
+      // Try to promote the elements of integer vectors. If no legal
+      // promotion was found, fall through to the widen-vector method.
+      for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+        MVT SVT = (MVT::SimpleValueType) nVT;
         // Promote vectors of integers to vectors with the same number
         // of elements, with a wider element type.
         if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
-            && SVT.getVectorNumElements() == NElts &&
-            isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+            && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)
+            && SVT.getScalarType().isInteger()) {
           TransformToType[i] = SVT;
           RegisterTypeForVT[i] = SVT;
           NumRegistersForVT[i] = 1;
@@ -1110,15 +1111,15 @@
           break;
         }
       }
-
-      if (IsLegalWiderType) continue;
-
+      if (IsLegalWiderType)
+        break;
+    }
+    case TypeWidenVector: {
       // Try to widen the vector.
-      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
-        MVT SVT = (MVT::SimpleValueType)nVT;
-        if (SVT.getVectorElementType() == EltVT &&
-            SVT.getVectorNumElements() > NElts &&
-            isTypeLegal(SVT)) {
+      for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+        MVT SVT = (MVT::SimpleValueType) nVT;
+        if (SVT.getVectorElementType() == EltVT
+            && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) {
           TransformToType[i] = SVT;
           RegisterTypeForVT[i] = SVT;
           NumRegistersForVT[i] = 1;
@@ -1127,27 +1128,34 @@
           break;
         }
       }
-      if (IsLegalWiderType) continue;
+      if (IsLegalWiderType)
+        break;
     }
-
-    MVT IntermediateVT;
-    MVT RegisterVT;
-    unsigned NumIntermediates;
-    NumRegistersForVT[i] =
-      getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
-                                RegisterVT, this);
-    RegisterTypeForVT[i] = RegisterVT;
-
-    MVT NVT = VT.getPow2VectorType();
-    if (NVT == VT) {
-      // Type is already a power of 2.  The default action is to split.
-      TransformToType[i] = MVT::Other;
-      unsigned NumElts = VT.getVectorNumElements();
-      ValueTypeActions.setTypeAction(VT,
-            NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
-    } else {
-      TransformToType[i] = NVT;
-      ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+    case TypeSplitVector:
+    case TypeScalarizeVector: {
+      MVT IntermediateVT;
+      MVT RegisterVT;
+      unsigned NumIntermediates;
+      NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT,
+          NumIntermediates, RegisterVT, this);
+      RegisterTypeForVT[i] = RegisterVT;
+
+      MVT NVT = VT.getPow2VectorType();
+      if (NVT == VT) {
+        // Type is already a power of 2.  The default action is to split.
+        TransformToType[i] = MVT::Other;
+        if (PreferredAction == TypeScalarizeVector)
+          ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
+        else
+          ValueTypeActions.setTypeAction(VT, TypeSplitVector);
+      } else {
+        TransformToType[i] = NVT;
+        ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+      }
+      break;
+    }
+    default:
+      llvm_unreachable("Unknown vector legalization action!");
     }
   }
 
Index: lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.h
+++ lib/Target/AArch64/AArch64ISelLowering.h
@@ -324,6 +324,9 @@
 
   bool shouldExpandAtomicInIR(Instruction *Inst) const override;
 
+  TargetLoweringBase::LegalizeTypeAction
+  getPreferredVectorAction(EVT VT) const override;
+
 private:
   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
   /// make the right decision when generating code for different targets.
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7886,6 +7886,19 @@
   return Inst->getType()->getPrimitiveSizeInBits() <= 128;
 }
 
+TargetLoweringBase::LegalizeTypeAction
+AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
+  MVT SVT = VT.getSimpleVT();
+  // During type legalization, we prefer to widen v1i8, v1i16, v1i32  to v8i8,
+  // v4i16, v2i32 instead of to promote.
+  if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32
+      || SVT == MVT::v1f32)
+    return TypeWidenVector;
+  if (VT.getVectorNumElements() == 1)
+    return TypeScalarizeVector;
+  return TypePromoteInteger;
+}
+
 Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
                                              AtomicOrdering Ord) const {
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Index: lib/Target/NVPTX/NVPTXISelLowering.h
===================================================================
--- lib/Target/NVPTX/NVPTXISelLowering.h
+++ lib/Target/NVPTX/NVPTXISelLowering.h
@@ -242,7 +242,8 @@
   // PTX always uses 32-bit shift amounts
   MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
 
-  bool shouldSplitVectorType(EVT VT) const override;
+  TargetLoweringBase::LegalizeTypeAction
+  getPreferredVectorAction(EVT VT) const override;
 
 private:
   const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
Index: lib/Target/NVPTX/NVPTXISelLowering.cpp
===================================================================
--- lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -473,8 +473,13 @@
   }
 }
 
-bool NVPTXTargetLowering::shouldSplitVectorType(EVT VT) const {
-  return VT.getScalarType() == MVT::i1;
+TargetLoweringBase::LegalizeTypeAction
+NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
+  if (VT.getVectorNumElements() == 1)
+    return TypeScalarizeVector;
+  if (VT.getScalarType() == MVT::i1)
+    return TypeSplitVector;
+  return TypePromoteInteger;
 }
 
 SDValue
Index: lib/Target/R600/SIISelLowering.h
===================================================================
--- lib/Target/R600/SIISelLowering.h
+++ lib/Target/R600/SIISelLowering.h
@@ -50,7 +50,9 @@
   SITargetLowering(TargetMachine &tm);
   bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
                                      bool *IsFast) const override;
-  bool shouldSplitVectorType(EVT VT) const override;
+
+  TargetLoweringBase::LegalizeTypeAction
+  getPreferredVectorAction(EVT VT) const override;
 
   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                         Type *Ty) const override;
Index: lib/Target/R600/SIISelLowering.cpp
===================================================================
--- lib/Target/R600/SIISelLowering.cpp
+++ lib/Target/R600/SIISelLowering.cpp
@@ -271,8 +271,13 @@
   return VT.bitsGT(MVT::i32);
 }
 
-bool SITargetLowering::shouldSplitVectorType(EVT VT) const {
-  return VT.getScalarType().bitsLE(MVT::i16);
+TargetLoweringBase::LegalizeTypeAction
+SITargetLowering::getPreferredVectorAction(EVT VT) const {
+  if (VT.getVectorNumElements() == 1)
+    return TypeScalarizeVector;
+  if (VT.getScalarType().bitsLE(MVT::i16))
+    return TypeSplitVector;
+  return TypePromoteInteger;
 }
 
 bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Index: test/CodeGen/AArch64/arm64-neon-copy.ll
===================================================================
--- test/CodeGen/AArch64/arm64-neon-copy.ll
+++ test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -842,7 +842,7 @@
 
 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
 ; CHECK-LABEL: testDUP.v1i8:
-; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+; CHECK: dup v0.8b, v0.b[0]
   %b = extractelement <1 x i8> %a, i32 0
   %c = insertelement <8 x i8> undef, i8 %b, i32 0
   %d = insertelement <8 x i8> %c, i8 %b, i32 1
@@ -857,7 +857,7 @@
 
 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
 ; CHECK-LABEL: testDUP.v1i16:
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+; CHECK: dup v0.8h, v0.h[0]
   %b = extractelement <1 x i16> %a, i32 0
   %c = insertelement <8 x i16> undef, i16 %b, i32 0
   %d = insertelement <8 x i16> %c, i16 %b, i32 1
@@ -872,7 +872,7 @@
 
 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
 ; CHECK-LABEL: testDUP.v1i32:
-; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+; CHECK: dup v0.4s, v0.s[0]
   %b = extractelement <1 x i32> %a, i32 0
   %c = insertelement <4 x i32> undef, i32 %b, i32 0
   %d = insertelement <4 x i32> %c, i32 %b, i32 1
@@ -1411,35 +1411,35 @@
 
 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
 ; CHECK-LABEL: concat_vector_v4i16:
-; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+; CHECK: dup v0.4h, v0.h[0]
  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
  ret <4 x i16> %r
 }
 
 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
 ; CHECK-LABEL: concat_vector_v4i32:
-; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+; CHECK: dup v0.4s, v0.s[0]
  %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
  ret <4 x i32> %r
 }
 
 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
 ; CHECK-LABEL: concat_vector_v8i8:
-; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+; CHECK: dup v0.8b, v0.b[0]
  %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
  ret <8 x i8> %r
 }
 
 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
 ; CHECK-LABEL: concat_vector_v8i16:
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+; CHECK: dup v0.8h, v0.h[0]
  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
  ret <8 x i16> %r
 }
 
 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
 ; CHECK-LABEL: concat_vector_v16i8:
-; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+; CHECK: dup v0.16b, v0.b[0]
  %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
  ret <16 x i8> %r
 }
Index: test/CodeGen/AArch64/arm64-neon-select_cc.ll
===================================================================
--- test/CodeGen/AArch64/arm64-neon-select_cc.ll
+++ test/CodeGen/AArch64/arm64-neon-select_cc.ll
@@ -136,8 +136,8 @@
 
 define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
 ; CHECK-LABEL: test_select_cc_v1f32:
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel s0, s2, s3, eq
+; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
+; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b
   %cmp31 = fcmp oeq float %a, %b
   %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
   ret <1 x float> %e
Index: test/CodeGen/AArch64/trunc-v1i64.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/trunc-v1i64.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
+
+; An optimization in DAG Combiner to fold
+; (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...))
+; will generate nodes like:
+;     v1i32 trunc v1i64, v1i16 trunc v1i64, v1i8 trunc v1i64.
+; And such nodes will be defaultly scalarized in type legalization. But such
+; scalarization will cause an assertion failure, as v1i64 is a legal type in
+; AArch64. We change the default behaviour from be scalarized to be widen.
+
+; FIXME: Currently XTN is generated for v1i32, but it can be optimized.
+; Just like v1i16 and v1i8, there is no XTN generated.
+
+define <2 x i32> @test_v1i32_0(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i32_0:
+; CHECK: xtn v0.2s, v0.2d
+  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
+  %2 = trunc <2 x i64> %1 to <2 x i32>
+  ret <2 x i32> %2
+}
+
+define <2 x i32> @test_v1i32_1(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i32_1:
+; CHECK: xtn v0.2s, v0.2d
+; CHECK-NEXT: dup v0.2s, v0.s[0]
+  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 undef, i32 0>
+  %2 = trunc <2 x i64> %1 to <2 x i32>
+  ret <2 x i32> %2
+}
+
+define <4 x i16> @test_v1i16_0(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i16_0:
+; CHECK-NOT: xtn
+  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %2 = trunc <4 x i64> %1 to <4 x i16>
+  ret <4 x i16> %2
+}
+
+define <4 x i16> @test_v1i16_1(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i16_1:
+; CHECK-NOT: xtn
+; CHECK: dup v0.4h, v0.h[0]
+  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 undef>
+  %2 = trunc <4 x i64> %1 to <4 x i16>
+  ret <4 x i16> %2
+}
+
+define <8 x i8> @test_v1i8_0(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i8_0:
+; CHECK-NOT: xtn
+  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %2 = trunc <8 x i64> %1 to <8 x i8>
+  ret <8 x i8> %2
+}
+
+define <8 x i8> @test_v1i8_1(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i8_1:
+; CHECK-NOT: xtn
+; CHECK: dup v0.8b, v0.b[0]
+  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %2 = trunc <8 x i64> %1 to <8 x i8>
+  ret <8 x i8> %2
+}
\ No newline at end of file