Index: include/llvm/CodeGen/GlobalISel/CombinerHelper.h
===================================================================
--- include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -88,6 +88,29 @@
   bool matchCombineBr(MachineInstr &MI);
   bool tryCombineBr(MachineInstr &MI);
 
+  /// If \p MI is G_CONCAT_VECTORS, try to combine it.
+  /// Returns true if MI changed.
+  /// Right now, we support:
+  /// - concat_vector(undef, undef) => undef
+  /// - concat_vector(build_vector(A, B), build_vector(C, D)) =>
+  ///   build_vector(A, B, C, D)
+  ///
+  /// \pre MI.getOpcode() == G_CONCAT_VECTORS.
+  bool tryCombineConcatVectors(MachineInstr &MI);
+  /// Check if the G_CONCAT_VECTORS \p MI is undef or if it
+  /// can be flattened into a build_vector.
+  /// In the first case \p IsUndef will be true.
+  /// In the second case \p Ops will contain the operands needed
+  /// to produce the flattened build_vector.
+  ///
+  /// \pre MI.getOpcode() == G_CONCAT_VECTORS.
+  bool matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
+                                 SmallVectorImpl<Register> &Ops);
+  /// Replace \p MI with a flattened build_vector with \p Ops or an
+  /// implicit_def if IsUndef is true.
+  void applyCombineConcatVectors(MachineInstr &MI, bool IsUndef,
+                                 const ArrayRef<Register> Ops);
+
   /// Optimize memcpy intrinsics et al, e.g. constant len calls.
   /// /p MaxLen if non-zero specifies the max length of a mem libcall to inline.
   ///
Index: lib/CodeGen/GlobalISel/CombinerHelper.cpp
===================================================================
--- lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -89,6 +89,92 @@
   replaceRegWith(MRI, DstReg, SrcReg);
 }
 
+bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
+  bool IsUndef = false;
+  SmallVector<Register, 4> Ops;
+  if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
+    applyCombineConcatVectors(MI, IsUndef, Ops);
+    return true;
+  }
+  return false;
+}
+
+bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
+                                               SmallVectorImpl<Register> &Ops) {
+  assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
+         "Invalid instruction");
+  IsUndef = true;
+  MachineInstr *Undef = nullptr;
+
+  // Walk over all the operands of concat vectors and check if they are
+  // build_vector themselves or undef.
+  // Then collect their operands in Ops.
+  for (const MachineOperand &MO : MI.operands()) {
+    assert(MO.isReg() && "Concat non-register values");
+    // Skip the instruction definition.
+    if (MO.isDef())
+      continue;
+    Register Reg = MO.getReg();
+    MachineInstr *Def = MRI.getVRegDef(Reg);
+    assert(Def && "Operand not defined");
+    switch (Def->getOpcode()) {
+    case TargetOpcode::G_BUILD_VECTOR:
+      IsUndef = false;
+      // Remember the operands of the build_vector to fold
+      // them into the yet-to-build flattened concat vectors.
+      for (const MachineOperand &BuildVecMO : Def->operands()) {
+        assert(MO.isReg() && "Build a vector with non-register values");
+        // Skip the definition.
+        if (BuildVecMO.isDef())
+          continue;
+        Ops.push_back(BuildVecMO.getReg());
+      }
+      break;
+    case TargetOpcode::G_IMPLICIT_DEF:
+      // Keep one undef value for all the undef operands.
+      if (!Undef) {
+        assert(MRI.getType(Reg).isVector() &&
+               "We should be concatenating vectors");
+        Builder.setInsertPt(*MI.getParent(), MI);
+        Undef = Builder.buildUndef(MRI.getType(Reg).getScalarType());
+      }
+      assert(MRI.getType(Undef->getOperand(0).getReg()) ==
+                 MRI.getType(Reg).getScalarType() &&
+             "All undefs should have the same type");
+      // Break the undef vector in as many scalar elements as needed
+      // for the flattening.
+      for (unsigned EltIdx = 0, EltEnd = MRI.getType(Reg).getNumElements();
+           EltIdx != EltEnd; ++EltIdx)
+        Ops.push_back(Undef->getOperand(0).getReg());
+      break;
+    default:
+      return false;
+    }
+  }
+  return true;
+}
+void CombinerHelper::applyCombineConcatVectors(
+    MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
+  // We determined that the concat_vectors can be flatten.
+  // Generate the flattened build_vector.
+  Register DstReg = MI.getOperand(0).getReg();
+  Builder.setInsertPt(*MI.getParent(), MI);
+  Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
+
+  // Note: IsUndef is sort of redundant. We could have determine it by
+  // checking that at all Ops are undef.  Alternatively, we could have
+  // generate a build_vector of undefs and rely on another combine to
+  // clean that up.  For now, given we already gather this information
+  // in tryCombineConcatVectors, just save compile time and issue the
+  // right thing.
+  if (IsUndef)
+    Builder.buildUndef(NewDstReg);
+  else
+    Builder.buildBuildVector(NewDstReg, Ops);
+  MI.eraseFromParent();
+  replaceRegWith(MRI, DstReg, NewDstReg);
+}
+
 namespace {
 
 /// Select a preference between two uses. CurrentUse is the current preference
Index: lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
===================================================================
--- lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
+++ lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
@@ -64,6 +64,8 @@
     return Helper.tryCombineCopy(MI);
   case TargetOpcode::G_BR:
     return Helper.tryCombineBr(MI);
+  case TargetOpcode::G_CONCAT_VECTORS:
+    return Helper.tryCombineConcatVectors(MI);
   case TargetOpcode::G_LOAD:
   case TargetOpcode::G_SEXTLOAD:
   case TargetOpcode::G_ZEXTLOAD: {
Index: test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir
@@ -0,0 +1,141 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-apple-ios  -run-pass=aarch64-prelegalizer-combiner %s -o - | FileCheck %s
+
+# Check that we canonicalize concat_vectors(build_vector, build_vector)
+# into build_vector.
+---
+name: concat_to_build_vector
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+
+    ; CHECK-LABEL: name: concat_to_build_vector
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64)
+    ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %3:_(s64) = COPY $x3
+    %4:_(<2 x s64>) = G_BUILD_VECTOR %0(s64), %1
+    %5:_(<2 x s64>) = G_BUILD_VECTOR %2(s64), %3
+    %6:_(<4 x s64>) = G_CONCAT_VECTORS %4(<2 x s64>), %5
+    RET_ReallyLR implicit %6
+...
+# Same test as concat_to_build_vector but with pointer types.
+---
+name: concat_to_build_vector_ptr
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+
+    ; CHECK-LABEL: name: concat_to_build_vector_ptr
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+    ; CHECK: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[COPY2]](p0), [[COPY3]](p0)
+    ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>)
+    %0:_(p0) = COPY $x0
+    %1:_(p0) = COPY $x1
+    %2:_(p0) = COPY $x2
+    %3:_(p0) = COPY $x3
+    %4:_(<2 x p0>) = G_BUILD_VECTOR %0(p0), %1
+    %5:_(<2 x p0>) = G_BUILD_VECTOR %2(p0), %3
+    %6:_(<4 x p0>) = G_CONCAT_VECTORS %4(<2 x p0>), %5
+    RET_ReallyLR implicit %6
+...
+# Check that we canonicalize concat_vectors(undef, undef) into undef.
+---
+name: concat_to_undef
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: concat_to_undef
+    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CHECK: RET_ReallyLR implicit [[DEF]](<4 x s64>)
+    %4:_(<2 x s64>) = G_IMPLICIT_DEF
+    %5:_(<2 x s64>) = G_IMPLICIT_DEF
+    %6:_(<4 x s64>) = G_CONCAT_VECTORS %4(<2 x s64>), %5
+    RET_ReallyLR implicit %6
+...
+# Check that when combining concat_vectors(build_vector, undef) into
+# build_vector, we correctly break the undef vector into a sequence
+# of undef scalar.
+---
+name: concat_to_build_vector_with_undef
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: concat_to_build_vector_with_undef
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %4:_(<2 x s64>) = G_BUILD_VECTOR %0(s64), %1
+    %5:_(<2 x s64>) = G_IMPLICIT_DEF
+    %6:_(<4 x s64>) = G_CONCAT_VECTORS %4(<2 x s64>), %5
+    RET_ReallyLR implicit %6
+...
+# Same as concat_to_build_vector_with_undef but with pointer types.
+---
+name: concat_to_build_vector_with_undef_ptr
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: concat_to_build_vector_with_undef_ptr
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[DEF]](p0), [[DEF]](p0)
+    ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>)
+    %0:_(p0) = COPY $x0
+    %1:_(p0) = COPY $x1
+    %4:_(<2 x p0>) = G_BUILD_VECTOR %0(p0), %1
+    %5:_(<2 x p0>) = G_IMPLICIT_DEF
+    %6:_(<4 x p0>) = G_CONCAT_VECTORS %4(<2 x p0>), %5
+    RET_ReallyLR implicit %6
+...
+# Check that we keep a concat_vectors as soon as one of the operand is
+# not undef or build_vector. I.e., we cannot flatten the concat_vectors.
+---
+name: concat_to_build_vector_negative_test
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: concat_to_build_vector_negative_test
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32)
+    ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[IVEC]](<2 x s64>), [[DEF1]](<2 x s64>)
+    ; CHECK: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s64>)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(<2 x s64>) = G_IMPLICIT_DEF
+    %3:_(s32) = G_CONSTANT i32 1
+    %4:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32)
+    %5:_(<2 x s64>) = G_IMPLICIT_DEF
+    %6:_(<4 x s64>) = G_CONCAT_VECTORS %4(<2 x s64>), %5
+    RET_ReallyLR implicit %6
+...