diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -648,6 +648,7 @@
 
 include "AArch64InstrFormats.td"
 include "SVEInstrFormats.td"
+include "SMEInstrFormats.td"
 
 //===----------------------------------------------------------------------===//
 
@@ -8115,5 +8116,5 @@
 
 include "AArch64InstrAtomics.td"
 include "AArch64SVEInstrInfo.td"
-
+include "AArch64SMEInstrInfo.td"
 include "AArch64InstrGISel.td"
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -45,6 +45,16 @@
   def qsub1 : SubRegIndex<128>;
   def qsub2 : SubRegIndex<128>;
   def qsub3 : SubRegIndex<128>;
+  // Note: Code depends on these having consecutive numbers
+  def zasubb  : SubRegIndex<2048>; // (16 x 16)/1 bytes  = 2048 bits
+  def zasubh0 : SubRegIndex<1024>; // (16 x 16)/2 bytes  = 1024 bits
+  def zasubh1 : SubRegIndex<1024>; // (16 x 16)/2 bytes  = 1024 bits
+  def zasubs0 : SubRegIndex<512>;  // (16 x 16)/4 bytes  = 512 bits
+  def zasubs1 : SubRegIndex<512>;  // (16 x 16)/4 bytes  = 512 bits
+  def zasubd0 : SubRegIndex<256>;  // (16 x 16)/8 bytes  = 256 bits
+  def zasubd1 : SubRegIndex<256>;  // (16 x 16)/8 bytes  = 256 bits
+  def zasubq0 : SubRegIndex<128>;  // (16 x 16)/16 bytes = 128 bits
+  def zasubq1 : SubRegIndex<128>;  // (16 x 16)/16 bytes = 128 bits
 }
 
 let Namespace = "AArch64" in {
@@ -1156,3 +1166,188 @@
   def GPR64NoXZRshiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64NoXZRshifted", Scale, "GPR64common">;
   def GPR64NoXZRshifted # Scale : GPR64ExtendRegisterOperand<"GPR64NoXZRshiftedAsmOpnd" # Scale, Scale, GPR64common>;
 }
+
+// Accumulator array tiles.
+def ZAQ0  : AArch64Reg<0,  "za0.q">;
+def ZAQ1  : AArch64Reg<1,  "za1.q">;
+def ZAQ2  : AArch64Reg<2,  "za2.q">;
+def ZAQ3  : AArch64Reg<3,  "za3.q">;
+def ZAQ4  : AArch64Reg<4,  "za4.q">;
+def ZAQ5  : AArch64Reg<5,  "za5.q">;
+def ZAQ6  : AArch64Reg<6,  "za6.q">;
+def ZAQ7  : AArch64Reg<7,  "za7.q">;
+def ZAQ8  : AArch64Reg<8,  "za8.q">;
+def ZAQ9  : AArch64Reg<9,  "za9.q">;
+def ZAQ10 : AArch64Reg<10, "za10.q">;
+def ZAQ11 : AArch64Reg<11, "za11.q">;
+def ZAQ12 : AArch64Reg<12, "za12.q">;
+def ZAQ13 : AArch64Reg<13, "za13.q">;
+def ZAQ14 : AArch64Reg<14, "za14.q">;
+def ZAQ15 : AArch64Reg<15, "za15.q">;
+
+let SubRegIndices = [zasubq0, zasubq1] in {
+  def ZAD0 : AArch64Reg<0, "za0.d", [ZAQ0, ZAQ8]>;
+  def ZAD1 : AArch64Reg<1, "za1.d", [ZAQ1, ZAQ9]>;
+  def ZAD2 : AArch64Reg<2, "za2.d", [ZAQ2, ZAQ10]>;
+  def ZAD3 : AArch64Reg<3, "za3.d", [ZAQ3, ZAQ11]>;
+  def ZAD4 : AArch64Reg<4, "za4.d", [ZAQ4, ZAQ12]>;
+  def ZAD5 : AArch64Reg<5, "za5.d", [ZAQ5, ZAQ13]>;
+  def ZAD6 : AArch64Reg<6, "za6.d", [ZAQ6, ZAQ14]>;
+  def ZAD7 : AArch64Reg<7, "za7.d", [ZAQ7, ZAQ15]>;
+}
+
+let SubRegIndices = [zasubd0, zasubd1] in {
+  def ZAS0 : AArch64Reg<0, "za0.s", [ZAD0, ZAD4]>;
+  def ZAS1 : AArch64Reg<1, "za1.s", [ZAD1, ZAD5]>;
+  def ZAS2 : AArch64Reg<2, "za2.s", [ZAD2, ZAD6]>;
+  def ZAS3 : AArch64Reg<3, "za3.s", [ZAD3, ZAD7]>;
+}
+
+let SubRegIndices = [zasubs0, zasubs1] in {
+  def ZAH0 : AArch64Reg<0, "za0.h", [ZAS0, ZAS2]>;
+  def ZAH1 : AArch64Reg<1, "za1.h", [ZAS1, ZAS3]>;
+}
+
+let SubRegIndices = [zasubh0, zasubh1] in {
+  def ZAB0 : AArch64Reg<0, "za0.b", [ZAH0, ZAH1]>;
+}
+
+let SubRegIndices = [zasubb] in {
+  def ZA : AArch64Reg<0, "za", [ZAB0]>;
+}
+
+// SME Register Classes
+
+// Accumulator array
+def MPR : RegisterClass<"AArch64", [untyped], 2048, (add ZA)> {
+  let Size = 2048;
+}
+
+// Accumulator array as single tiles
+def MPR8    : RegisterClass<"AArch64", [untyped], 2048, (add (sequence "ZAB%u", 0, 0))> {
+  let Size = 2048;
+}
+def MPR16   : RegisterClass<"AArch64", [untyped], 1024, (add (sequence "ZAH%u", 0, 1))> {
+  let Size = 1024;
+}
+def MPR32   : RegisterClass<"AArch64", [untyped],  512, (add (sequence "ZAS%u", 0, 3))> {
+  let Size = 512;
+}
+def MPR64   : RegisterClass<"AArch64", [untyped],  256, (add (sequence "ZAD%u", 0, 7))> {
+  let Size = 256;
+}
+def MPR128  : RegisterClass<"AArch64", [untyped],  128, (add (sequence "ZAQ%u", 0, 15))> {
+  let Size = 128;
+}
+
+// SME Register Operands
+// There are three types of SME matrix register operands:
+// * Tiles:
+//
+//   These tiles make up the larger accumulator matrix. The tile representation
+//   has an element type suffix, e.g. za0.b or za15.q and can be any of the
+//   registers:
+//          ZAQ0..ZAQ15
+//          ZAD0..ZAD7
+//          ZAS0..ZAS3
+//          ZAH0..ZAH1
+//       or ZAB0
+//
+// * Tile vectors:
+//
+//   Their representation is similar to regular tiles, but they have an extra
+//   'h' or 'v' to tell how the vector at [reg+offset] is layed out in the tile,
+//   horizontally or vertically.
+//
+//   e.g. za1h.h or za15v.q, which corresponds to vectors in registers ZAH1 and
+//   ZAQ15, respectively. The horizontal/vertical is more a property of the
+//   instruction, than a property of the asm-operand itself, or its register.
+//   The distinction is required for the parsing/printing of the operand,
+//   as from a compiler's perspective, the whole tile is read/written.
+//
+// * Accumulator matrix:
+//
+//   This is the entire matrix accumulator register ZA (<=> ZAB0), printed as
+//   'za'.
+
+//
+// Tiles
+//
+
+class MatrixTileAsmOperand<string RC, int EltSize> : AsmOperandClass {
+  let Name = "MatrixTile" # EltSize;
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::Tile" # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixTileOperand<int EltSize, int NumBitsForTile, RegisterClass RC>
+    : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixTileAsmOperand<!cast<string>(RC), EltSize>;
+  let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
+  let PrintMethod = "printMatrixTile";
+}
+
+def TileOp32  : MatrixTileOperand<32, 2, MPR32>;
+def TileOp64  : MatrixTileOperand<64, 3, MPR64>;
+
+//
+// Tile vectors (horizontal and vertical)
+//
+
+class MatrixTileVectorAsmOperand<string RC, int EltSize, int IsVertical>
+    : AsmOperandClass {
+  let Name = "MatrixTileVector" # !if(IsVertical, "V", "H") # EltSize;
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::"
+                          # !if(IsVertical, "Col", "Row") # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixTileVectorOperand<int EltSize, int NumBitsForTile,
+                              RegisterClass RC, int IsVertical>
+    : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixTileVectorAsmOperand<!cast<string>(RC), EltSize,
+                                                    IsVertical>;
+  let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
+  let PrintMethod = "printMatrixTileVector<" # IsVertical # ">";
+}
+
+def TileVectorOpH8   : MatrixTileVectorOperand<  8, 0, MPR8,   0>;
+def TileVectorOpH16  : MatrixTileVectorOperand< 16, 1, MPR16,  0>;
+def TileVectorOpH32  : MatrixTileVectorOperand< 32, 2, MPR32,  0>;
+def TileVectorOpH64  : MatrixTileVectorOperand< 64, 3, MPR64,  0>;
+def TileVectorOpH128 : MatrixTileVectorOperand<128, 4, MPR128, 0>;
+
+def TileVectorOpV8   : MatrixTileVectorOperand<  8, 0, MPR8,   1>;
+def TileVectorOpV16  : MatrixTileVectorOperand< 16, 1, MPR16,  1>;
+def TileVectorOpV32  : MatrixTileVectorOperand< 32, 2, MPR32,  1>;
+def TileVectorOpV64  : MatrixTileVectorOperand< 64, 3, MPR64,  1>;
+def TileVectorOpV128 : MatrixTileVectorOperand<128, 4, MPR128, 1>;
+
+//
+// Accumulator matrix
+//
+
+class MatrixAsmOperand<string RC, int EltSize> : AsmOperandClass {
+  let Name = "Matrix";
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::Array" # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixOperand<RegisterClass RC, int EltSize> : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixAsmOperand<!cast<string>(RC), EltSize>;
+  let PrintMethod = "printMatrix<" # EltSize # ">";
+}
+
+def MatrixOp : MatrixOperand<MPR, 0>;
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -0,0 +1,25 @@
+//=- AArch64SMEInstrInfo.td -  AArch64 SME Instructions -*- tablegen -*-----=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Matrix Extension (SME) Instruction definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Add vector elements horizontally or vertically to ZA tile.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasSME] in {
+def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">;
+def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">;
+}
+
+let Predicates = [HasSMEI64] in {
+def ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha">;
+def ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva">;
+}
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -66,9 +66,12 @@
   Scalar,
   NeonVector,
   SVEDataVector,
-  SVEPredicateVector
+  SVEPredicateVector,
+  Matrix
 };
 
+enum class MatrixKind { Array, Tile, Row, Col };
+
 enum RegConstraintEqualityTy {
   EqualsReg,
   EqualsSuperReg,
@@ -229,6 +232,7 @@
   OperandMatchResultTy tryParseScalarRegister(unsigned &Reg);
   OperandMatchResultTy tryParseVectorRegister(unsigned &Reg, StringRef &Kind,
                                               RegKind MatchKind);
+  OperandMatchResultTy tryParseMatrixRegister(OperandVector &Operands);
   OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands);
   OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands);
   OperandMatchResultTy tryParseBarriernXSOperand(OperandVector &Operands);
@@ -316,6 +320,7 @@
     k_ShiftedImm,
     k_CondCode,
     k_Register,
+    k_MatrixRegister,
     k_VectorList,
     k_VectorIndex,
     k_Token,
@@ -370,6 +375,12 @@
     ShiftExtendOp ShiftExtend;
   };
 
+  struct MatrixRegOp {
+    unsigned RegNum;
+    unsigned ElementWidth;
+    MatrixKind Kind;
+  };
+
   struct VectorListOp {
     unsigned RegNum;
     unsigned Count;
@@ -440,6 +451,7 @@
   union {
     struct TokOp Tok;
     struct RegOp Reg;
+    struct MatrixRegOp MatrixReg;
     struct VectorListOp VectorList;
     struct VectorIndexOp VectorIndex;
     struct ImmOp Imm;
@@ -488,6 +500,9 @@
     case k_Register:
       Reg = o.Reg;
       break;
+    case k_MatrixRegister:
+      MatrixReg = o.MatrixReg;
+      break;
     case k_VectorList:
       VectorList = o.VectorList;
       break;
@@ -580,6 +595,21 @@
     return Reg.RegNum;
   }
 
+  unsigned getMatrixReg() const {
+    assert(Kind == k_MatrixRegister && "Invalid access!");
+    return MatrixReg.RegNum;
+  }
+
+  unsigned getMatrixElementWidth() const {
+    assert(Kind == k_MatrixRegister && "Invalid access!");
+    return MatrixReg.ElementWidth;
+  }
+
+  MatrixKind getMatrixKind() const {
+    assert(Kind == k_MatrixRegister && "Invalid access!");
+    return MatrixReg.Kind;
+  }
+
   RegConstraintEqualityTy getRegEqualityTy() const {
     assert(Kind == k_Register && "Invalid access!");
     return Reg.EqualityTy;
@@ -1089,6 +1119,8 @@
                 Reg.RegNum));
   }
 
+  bool isMatrix() const { return Kind == k_MatrixRegister; }
+
   template <unsigned Class> bool isSVEVectorReg() const {
     RegKind RK;
     switch (Class) {
@@ -1470,6 +1502,15 @@
     return true;
   }
 
+  template <MatrixKind Kind, unsigned EltSize, unsigned RegClass>
+  DiagnosticPredicate isMatrixRegOperand() const {
+    if (isMatrix() && getMatrixKind() == Kind &&
+        AArch64MCRegisterClasses[RegClass].contains(getMatrixReg()) &&
+        EltSize == getMatrixElementWidth())
+      return DiagnosticPredicateTy::Match;
+    return DiagnosticPredicateTy::NoMatch;
+  }
+
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     // Add as immediates when possible.  Null MCExpr = 0.
     if (!Expr)
@@ -1485,6 +1526,11 @@
     Inst.addOperand(MCOperand::createReg(getReg()));
   }
 
+  void addMatrixOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getMatrixReg()));
+  }
+
   void addGPR32as64Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     assert(
@@ -2054,6 +2100,18 @@
     return Op;
   }
 
+  static std::unique_ptr<AArch64Operand>
+  CreateMatrixRegister(unsigned RegNum, unsigned ElementWidth, MatrixKind Kind,
+                       SMLoc S, SMLoc E, MCContext &Ctx) {
+    auto Op = std::make_unique<AArch64Operand>(k_MatrixRegister, Ctx);
+    Op->MatrixReg.RegNum = RegNum;
+    Op->MatrixReg.ElementWidth = ElementWidth;
+    Op->MatrixReg.Kind = Kind;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
   static std::unique_ptr<AArch64Operand>
   CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
                     bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
@@ -2132,6 +2190,9 @@
   case k_BTIHint:
     OS << getBTIHintName();
     break;
+  case k_MatrixRegister:
+    OS << "<matrix " << getMatrixReg() << ">";
+    break;
   case k_Register:
     OS << "<register " << getReg() << ">";
     if (!getShiftExtendAmount() && !hasShiftExtendAmount())
@@ -2229,6 +2290,7 @@
     break;
   case RegKind::SVEPredicateVector:
   case RegKind::SVEDataVector:
+  case RegKind::Matrix:
     Res = StringSwitch<std::pair<int, int>>(Suffix.lower())
               .Case("", {0, 0})
               .Case(".b", {0, 8})
@@ -2310,6 +2372,105 @@
       .Default(0);
 }
 
+static unsigned matchMatrixRegName(StringRef Name) {
+  return StringSwitch<unsigned>(Name.lower())
+      .Case("za", AArch64::ZA)
+      .Case("za0.q", AArch64::ZAQ0)
+      .Case("za1.q", AArch64::ZAQ1)
+      .Case("za2.q", AArch64::ZAQ2)
+      .Case("za3.q", AArch64::ZAQ3)
+      .Case("za4.q", AArch64::ZAQ4)
+      .Case("za5.q", AArch64::ZAQ5)
+      .Case("za6.q", AArch64::ZAQ6)
+      .Case("za7.q", AArch64::ZAQ7)
+      .Case("za8.q", AArch64::ZAQ8)
+      .Case("za9.q", AArch64::ZAQ9)
+      .Case("za10.q", AArch64::ZAQ10)
+      .Case("za11.q", AArch64::ZAQ11)
+      .Case("za12.q", AArch64::ZAQ12)
+      .Case("za13.q", AArch64::ZAQ13)
+      .Case("za14.q", AArch64::ZAQ14)
+      .Case("za15.q", AArch64::ZAQ15)
+      .Case("za0.d", AArch64::ZAD0)
+      .Case("za1.d", AArch64::ZAD1)
+      .Case("za2.d", AArch64::ZAD2)
+      .Case("za3.d", AArch64::ZAD3)
+      .Case("za4.d", AArch64::ZAD4)
+      .Case("za5.d", AArch64::ZAD5)
+      .Case("za6.d", AArch64::ZAD6)
+      .Case("za7.d", AArch64::ZAD7)
+      .Case("za0.s", AArch64::ZAS0)
+      .Case("za1.s", AArch64::ZAS1)
+      .Case("za2.s", AArch64::ZAS2)
+      .Case("za3.s", AArch64::ZAS3)
+      .Case("za0.h", AArch64::ZAH0)
+      .Case("za1.h", AArch64::ZAH1)
+      .Case("za0.b", AArch64::ZAB0)
+      .Case("za0h.q", AArch64::ZAQ0)
+      .Case("za1h.q", AArch64::ZAQ1)
+      .Case("za2h.q", AArch64::ZAQ2)
+      .Case("za3h.q", AArch64::ZAQ3)
+      .Case("za4h.q", AArch64::ZAQ4)
+      .Case("za5h.q", AArch64::ZAQ5)
+      .Case("za6h.q", AArch64::ZAQ6)
+      .Case("za7h.q", AArch64::ZAQ7)
+      .Case("za8h.q", AArch64::ZAQ8)
+      .Case("za9h.q", AArch64::ZAQ9)
+      .Case("za10h.q", AArch64::ZAQ10)
+      .Case("za11h.q", AArch64::ZAQ11)
+      .Case("za12h.q", AArch64::ZAQ12)
+      .Case("za13h.q", AArch64::ZAQ13)
+      .Case("za14h.q", AArch64::ZAQ14)
+      .Case("za15h.q", AArch64::ZAQ15)
+      .Case("za0h.d", AArch64::ZAD0)
+      .Case("za1h.d", AArch64::ZAD1)
+      .Case("za2h.d", AArch64::ZAD2)
+      .Case("za3h.d", AArch64::ZAD3)
+      .Case("za4h.d", AArch64::ZAD4)
+      .Case("za5h.d", AArch64::ZAD5)
+      .Case("za6h.d", AArch64::ZAD6)
+      .Case("za7h.d", AArch64::ZAD7)
+      .Case("za0h.s", AArch64::ZAS0)
+      .Case("za1h.s", AArch64::ZAS1)
+      .Case("za2h.s", AArch64::ZAS2)
+      .Case("za3h.s", AArch64::ZAS3)
+      .Case("za0h.h", AArch64::ZAH0)
+      .Case("za1h.h", AArch64::ZAH1)
+      .Case("za0h.b", AArch64::ZAB0)
+      .Case("za0v.q", AArch64::ZAQ0)
+      .Case("za1v.q", AArch64::ZAQ1)
+      .Case("za2v.q", AArch64::ZAQ2)
+      .Case("za3v.q", AArch64::ZAQ3)
+      .Case("za4v.q", AArch64::ZAQ4)
+      .Case("za5v.q", AArch64::ZAQ5)
+      .Case("za6v.q", AArch64::ZAQ6)
+      .Case("za7v.q", AArch64::ZAQ7)
+      .Case("za8v.q", AArch64::ZAQ8)
+      .Case("za9v.q", AArch64::ZAQ9)
+      .Case("za10v.q", AArch64::ZAQ10)
+      .Case("za11v.q", AArch64::ZAQ11)
+      .Case("za12v.q", AArch64::ZAQ12)
+      .Case("za13v.q", AArch64::ZAQ13)
+      .Case("za14v.q", AArch64::ZAQ14)
+      .Case("za15v.q", AArch64::ZAQ15)
+      .Case("za0v.d", AArch64::ZAD0)
+      .Case("za1v.d", AArch64::ZAD1)
+      .Case("za2v.d", AArch64::ZAD2)
+      .Case("za3v.d", AArch64::ZAD3)
+      .Case("za4v.d", AArch64::ZAD4)
+      .Case("za5v.d", AArch64::ZAD5)
+      .Case("za6v.d", AArch64::ZAD6)
+      .Case("za7v.d", AArch64::ZAD7)
+      .Case("za0v.s", AArch64::ZAS0)
+      .Case("za1v.s", AArch64::ZAS1)
+      .Case("za2v.s", AArch64::ZAS2)
+      .Case("za3v.s", AArch64::ZAS3)
+      .Case("za0v.h", AArch64::ZAH0)
+      .Case("za1v.h", AArch64::ZAH1)
+      .Case("za0v.b", AArch64::ZAB0)
+      .Default(0);
+}
+
 bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
                                      SMLoc &EndLoc) {
   return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success;
@@ -2337,6 +2498,9 @@
   if ((RegNum = MatchNeonVectorRegName(Name)))
     return Kind == RegKind::NeonVector ? RegNum : 0;
 
+  if ((RegNum = matchMatrixRegName(Name)))
+    return Kind == RegKind::Matrix ? RegNum : 0;
+
   // The parsed register must be of RegKind Scalar
   if ((RegNum = MatchRegisterName(Name)))
     return Kind == RegKind::Scalar ? RegNum : 0;
@@ -2809,6 +2973,54 @@
   return false;
 }
 
+OperandMatchResultTy
+AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc S = getLoc();
+
+  StringRef Name = Tok.getString();
+
+  if (Name.equals_insensitive("za")) {
+    Parser.Lex(); // eat "za"
+    Operands.push_back(AArch64Operand::CreateMatrixRegister(
+        AArch64::ZA, /*ElementWidth=*/0, MatrixKind::Array, S, getLoc(),
+        getContext()));
+    return MatchOperand_Success;
+  }
+
+  // Try to parse matrix register.
+  unsigned Reg = matchRegisterNameAlias(Name, RegKind::Matrix);
+  if (!Reg)
+    return MatchOperand_NoMatch;
+
+  size_t DotPosition = Name.find('.');
+  assert(DotPosition != StringRef::npos && "Unexpected register");
+
+  StringRef Head = Name.take_front(DotPosition);
+  StringRef Tail = Name.drop_front(DotPosition);
+  StringRef RowOrColumn = Head.take_back();
+
+  MatrixKind Kind = StringSwitch<MatrixKind>(RowOrColumn)
+                        .Case("h", MatrixKind::Row)
+                        .Case("v", MatrixKind::Col)
+                        .Default(MatrixKind::Tile);
+
+  // Next up, parsing the suffix
+  const auto &KindRes = parseVectorKind(Tail, RegKind::Matrix);
+  if (!KindRes) {
+    TokError("Expected the register to be followed by element width suffix");
+    return MatchOperand_ParseFail;
+  }
+  unsigned ElementWidth = KindRes->second;
+
+  Parser.Lex();
+
+  Operands.push_back(AArch64Operand::CreateMatrixRegister(
+      Reg, ElementWidth, Kind, S, getLoc(), getContext()));
+  return MatchOperand_Success;
+}
+
 /// tryParseOptionalShift - Some operands take an optional shift argument. Parse
 /// them if present.
 OperandMatchResultTy
@@ -4733,6 +4945,32 @@
     return Error(Loc, "Invalid floating point constant, expected 0.5 or 2.0.");
   case Match_InvalidSVEExactFPImmOperandZeroOne:
     return Error(Loc, "Invalid floating point constant, expected 0.0 or 1.0.");
+  case Match_InvalidMatrixTileVectorH8:
+    return Error(Loc, "invalid matrix operand, expected za0h.b");
+  case Match_InvalidMatrixTileVectorH16:
+    return Error(Loc, "invalid matrix operand, expected za[0-1]h.h");
+  case Match_InvalidMatrixTileVectorH32:
+    return Error(Loc, "invalid matrix operand, expected za[0-3]h.s");
+  case Match_InvalidMatrixTileVectorH64:
+    return Error(Loc, "invalid matrix operand, expected za[0-7]h.d");
+  case Match_InvalidMatrixTileVectorH128:
+    return Error(Loc, "invalid matrix operand, expected za[0-15]h.q");
+  case Match_InvalidMatrixTileVectorV8:
+    return Error(Loc, "invalid matrix operand, expected za0v.b");
+  case Match_InvalidMatrixTileVectorV16:
+    return Error(Loc, "invalid matrix operand, expected za[0-1]v.h");
+  case Match_InvalidMatrixTileVectorV32:
+    return Error(Loc, "invalid matrix operand, expected za[0-3]v.s");
+  case Match_InvalidMatrixTileVectorV64:
+    return Error(Loc, "invalid matrix operand, expected za[0-7]v.d");
+  case Match_InvalidMatrixTileVectorV128:
+    return Error(Loc, "invalid matrix operand, expected za[0-15]v.q");
+  case Match_InvalidMatrixTile32:
+    return Error(Loc, "invalid matrix operand, expected za[0-3].s");
+  case Match_InvalidMatrixTile64:
+    return Error(Loc, "invalid matrix operand, expected za[0-7].d");
+  case Match_InvalidMatrix:
+    return Error(Loc, "invalid matrix operand, expected za");
   default:
     llvm_unreachable("unexpected error code!");
   }
@@ -5251,6 +5489,19 @@
   case Match_InvalidSVEExactFPImmOperandHalfOne:
   case Match_InvalidSVEExactFPImmOperandHalfTwo:
   case Match_InvalidSVEExactFPImmOperandZeroOne:
+  case Match_InvalidMatrixTile32:
+  case Match_InvalidMatrixTile64:
+  case Match_InvalidMatrix:
+  case Match_InvalidMatrixTileVectorH8:
+  case Match_InvalidMatrixTileVectorH16:
+  case Match_InvalidMatrixTileVectorH32:
+  case Match_InvalidMatrixTileVectorH64:
+  case Match_InvalidMatrixTileVectorH128:
+  case Match_InvalidMatrixTileVectorV8:
+  case Match_InvalidMatrixTileVectorV16:
+  case Match_InvalidMatrixTileVectorV32:
+  case Match_InvalidMatrixTileVectorV64:
+  case Match_InvalidMatrixTileVectorV128:
   case Match_MSR:
   case Match_MRS: {
     if (ErrorInfo >= Operands.size())
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -111,6 +111,9 @@
 static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
                                             uint64_t Address,
                                             const void *Decoder);
+template <unsigned NumBitsForTile>
+static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
+                                     uint64_t Address, const void *Decoder);
 static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                            uint64_t Address,
                                            const void *Decoder);
@@ -642,6 +645,29 @@
   return Success;
 }
 
+static const SmallVector<SmallVector<unsigned, 16>, 5>
+    MatrixZATileDecoderTable = {
+        {AArch64::ZAB0},
+        {AArch64::ZAH0, AArch64::ZAH1},
+        {AArch64::ZAS0, AArch64::ZAS1, AArch64::ZAS2, AArch64::ZAS3},
+        {AArch64::ZAD0, AArch64::ZAD1, AArch64::ZAD2, AArch64::ZAD3,
+         AArch64::ZAD4, AArch64::ZAD5, AArch64::ZAD6, AArch64::ZAD7},
+        {AArch64::ZAQ0, AArch64::ZAQ1, AArch64::ZAQ2, AArch64::ZAQ3,
+         AArch64::ZAQ4, AArch64::ZAQ5, AArch64::ZAQ6, AArch64::ZAQ7,
+         AArch64::ZAQ8, AArch64::ZAQ9, AArch64::ZAQ10, AArch64::ZAQ11,
+         AArch64::ZAQ12, AArch64::ZAQ13, AArch64::ZAQ14, AArch64::ZAQ15}};
+
+template <unsigned NumBitsForTile>
+static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
+                                     uint64_t Address, const void *Decoder) {
+  unsigned LastReg = (1 << NumBitsForTile) - 1;
+  if (RegNo > LastReg)
+    return Fail;
+  Inst.addOperand(
+      MCOperand::createReg(MatrixZATileDecoderTable[NumBitsForTile][RegNo]));
+  return Success;
+}
+
 static const unsigned PPRDecoderTable[] = {
   AArch64::P0,  AArch64::P1,  AArch64::P2,  AArch64::P3,
   AArch64::P4,  AArch64::P5,  AArch64::P6,  AArch64::P7,
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -187,6 +187,15 @@
                           const MCSubtargetInfo &STI, raw_ostream &O);
   void printSVEPattern(const MCInst *MI, unsigned OpNum,
                        const MCSubtargetInfo &STI, raw_ostream &O);
+
+  template <bool IsVertical>
+  void printMatrixTileVector(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMatrixTile(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  template <int EltSize>
+  void printMatrix(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
   template <char = 0>
   void printSVERegOp(const MCInst *MI, unsigned OpNum,
                     const MCSubtargetInfo &STI, raw_ostream &O);
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -880,6 +880,59 @@
   return true;
 }
 
+template <int EltSize>
+void AArch64InstPrinter::printMatrix(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  const MCOperand &RegOp = MI->getOperand(OpNum);
+  assert(RegOp.isReg() && "Unexpected operand type!");
+
+  O << getRegisterName(RegOp.getReg());
+  switch (EltSize) {
+  case 0:
+    break;
+  case 8:
+    O << ".b";
+    break;
+  case 16:
+    O << ".h";
+    break;
+  case 32:
+    O << ".s";
+    break;
+  case 64:
+    O << ".d";
+    break;
+  case 128:
+    O << ".q";
+    break;
+  default:
+    llvm_unreachable("Unsupported element size");
+  }
+}
+
+template <bool IsVertical>
+void AArch64InstPrinter::printMatrixTileVector(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
+  const MCOperand &RegOp = MI->getOperand(OpNum);
+  assert(RegOp.isReg() && "Unexpected operand type!");
+  StringRef RegName = getRegisterName(RegOp.getReg());
+
+  // Insert the horizontal/vertical flag before the suffix.
+  StringRef Base, Suffix;
+  std::tie(Base, Suffix) = RegName.split('.');
+  O << Base << (IsVertical ? "v" : "h") << '.' << Suffix;
+}
+
+void AArch64InstPrinter::printMatrixTile(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  const MCOperand &RegOp = MI->getOperand(OpNum);
+  assert(RegOp.isReg() && "Unexpected operand type!");
+  O << getRegisterName(RegOp.getReg());
+}
+
 void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
                                       const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -0,0 +1,47 @@
+//=-- SMEInstrFormats.td -  AArch64 SME Instruction classes -*- tablegen -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SME Add Vector to Tile
+//===----------------------------------------------------------------------===//
+
+class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
+                                  ZPRRegOp zpr_ty, string mnemonic>
+    : I<(outs tile_ty:$ZAda),
+        (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
+        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
+        "", []>, Sched<[]> {
+  bits<3> Pm;
+  bits<3> Pn;
+  bits<5> Zn;
+  let Inst{31-23} = 0b110000001;
+  let Inst{22}    = op;
+  let Inst{21-17} = 0b01000;
+  let Inst{16}    = V;
+  let Inst{15-13} = Pm;
+  let Inst{12-10} = Pn;
+  let Inst{9-5}   = Zn;
+  let Inst{4-3}   = 0b00;
+}
+
+class sme_add_vector_to_tile_u32<bit V, string mnemonic>
+    : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> {
+  bits<2> ZAda;
+  let Inst{2}   = 0b0;
+  let Inst{1-0} = ZAda;
+}
+
+class sme_add_vector_to_tile_u64<bit V, string mnemonic>
+    : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> {
+  bits<3> ZAda;
+  let Inst{2-0} = ZAda;
+}
diff --git a/llvm/test/MC/AArch64/SME/addha-diagnostics.s b/llvm/test/MC/AArch64/SME/addha-diagnostics.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME/addha-diagnostics.s
@@ -0,0 +1,52 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+sme-i64 2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid tile
+
+addha za4.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za4.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za8.d, p0/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za8.d, p0/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0h.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za0h.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0v.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za0v.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0p.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za0p.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+addha za0.s, p8/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.s, p8/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0.s, p0/m, p8/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.s, p0/m, p8/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0.d, p8/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.d, p8/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0.d, p0/m, p8/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.d, p0/m, p8/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SME/addha-u32.s b/llvm/test/MC/AArch64/SME/addha-u32.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME/addha-u32.s
@@ -0,0 +1,85 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addha   za0.s, p0/m, p0/m, z0.s
+// CHECK-INST: addha   za0.s, p0/m, p0/m, z0.s
+// CHECK-ENCODING: [0x00,0x00,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 00 00 90 c0 <unknown>
+
+addha   za1.s, p5/m, p2/m, z10.s
+// CHECK-INST: addha   za1.s, p5/m, p2/m, z10.s
+// CHECK-ENCODING: [0x41,0x55,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 41 55 90 c0 <unknown>
+
+addha   za3.s, p3/m, p7/m, z13.s
+// CHECK-INST: addha   za3.s, p3/m, p7/m, z13.s
+// CHECK-ENCODING: [0xa3,0xed,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: a3 ed 90 c0 <unknown>
+
+addha   za3.s, p7/m, p7/m, z31.s
+// CHECK-INST: addha   za3.s, p7/m, p7/m, z31.s
+// CHECK-ENCODING: [0xe3,0xff,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: e3 ff 90 c0 <unknown>
+
+addha   za1.s, p3/m, p0/m, z17.s
+// CHECK-INST: addha   za1.s, p3/m, p0/m, z17.s
+// CHECK-ENCODING: [0x21,0x0e,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 0e 90 c0 <unknown>
+
+addha   za1.s, p1/m, p4/m, z1.s
+// CHECK-INST: addha   za1.s, p1/m, p4/m, z1.s
+// CHECK-ENCODING: [0x21,0x84,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 84 90 c0 <unknown>
+
+addha   za0.s, p5/m, p2/m, z19.s
+// CHECK-INST: addha   za0.s, p5/m, p2/m, z19.s
+// CHECK-ENCODING: [0x60,0x56,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 60 56 90 c0 <unknown>
+
+addha   za0.s, p6/m, p0/m, z12.s
+// CHECK-INST: addha   za0.s, p6/m, p0/m, z12.s
+// CHECK-ENCODING: [0x80,0x19,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 80 19 90 c0 <unknown>
+
+addha   za1.s, p2/m, p6/m, z1.s
+// CHECK-INST: addha   za1.s, p2/m, p6/m, z1.s
+// CHECK-ENCODING: [0x21,0xc8,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 c8 90 c0 <unknown>
+
+addha   za1.s, p2/m, p0/m, z22.s
+// CHECK-INST: addha   za1.s, p2/m, p0/m, z22.s
+// CHECK-ENCODING: [0xc1,0x0a,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: c1 0a 90 c0 <unknown>
+
+addha   za2.s, p5/m, p7/m, z9.s
+// CHECK-INST: addha   za2.s, p5/m, p7/m, z9.s
+// CHECK-ENCODING: [0x22,0xf5,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 22 f5 90 c0 <unknown>
+
+addha   za3.s, p2/m, p5/m, z12.s
+// CHECK-INST: addha   za3.s, p2/m, p5/m, z12.s
+// CHECK-ENCODING: [0x83,0xa9,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 83 a9 90 c0 <unknown>
diff --git a/llvm/test/MC/AArch64/SME/addha-u64.s b/llvm/test/MC/AArch64/SME/addha-u64.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME/addha-u64.s
@@ -0,0 +1,85 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme-i64 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addha   za0.d, p0/m, p0/m, z0.d
+// CHECK-INST: addha   za0.d, p0/m, p0/m, z0.d
+// CHECK-ENCODING: [0x00,0x00,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 00 00 d0 c0 <unknown>
+
+addha   za5.d, p5/m, p2/m, z10.d
+// CHECK-INST: addha   za5.d, p5/m, p2/m, z10.d
+// CHECK-ENCODING: [0x45,0x55,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 45 55 d0 c0 <unknown>
+
+addha   za7.d, p3/m, p7/m, z13.d
+// CHECK-INST: addha   za7.d, p3/m, p7/m, z13.d
+// CHECK-ENCODING: [0xa7,0xed,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: a7 ed d0 c0 <unknown>
+
+addha   za7.d, p7/m, p7/m, z31.d
+// CHECK-INST: addha   za7.d, p7/m, p7/m, z31.d
+// CHECK-ENCODING: [0xe7,0xff,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: e7 ff d0 c0 <unknown>
+
+addha   za5.d, p3/m, p0/m, z17.d
+// CHECK-INST: addha   za5.d, p3/m, p0/m, z17.d
+// CHECK-ENCODING: [0x25,0x0e,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 25 0e d0 c0 <unknown>
+
+addha   za1.d, p1/m, p4/m, z1.d
+// CHECK-INST: addha   za1.d, p1/m, p4/m, z1.d
+// CHECK-ENCODING: [0x21,0x84,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 84 d0 c0 <unknown>
+
+addha   za0.d, p5/m, p2/m, z19.d
+// CHECK-INST: addha   za0.d, p5/m, p2/m, z19.d
+// CHECK-ENCODING: [0x60,0x56,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 60 56 d0 c0 <unknown>
+
+addha   za0.d, p6/m, p0/m, z12.d
+// CHECK-INST: addha   za0.d, p6/m, p0/m, z12.d
+// CHECK-ENCODING: [0x80,0x19,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 80 19 d0 c0 <unknown>
+
+addha   za1.d, p2/m, p6/m, z1.d
+// CHECK-INST: addha   za1.d, p2/m, p6/m, z1.d
+// CHECK-ENCODING: [0x21,0xc8,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 c8 d0 c0 <unknown>
+
+addha   za5.d, p2/m, p0/m, z22.d
+// CHECK-INST: addha   za5.d, p2/m, p0/m, z22.d
+// CHECK-ENCODING: [0xc5,0x0a,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: c5 0a d0 c0 <unknown>
+
+addha   za2.d, p5/m, p7/m, z9.d
+// CHECK-INST: addha   za2.d, p5/m, p7/m, z9.d
+// CHECK-ENCODING: [0x22,0xf5,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 22 f5 d0 c0 <unknown>
+
+addha   za7.d, p2/m, p5/m, z12.d
+// CHECK-INST: addha   za7.d, p2/m, p5/m, z12.d
+// CHECK-ENCODING: [0x87,0xa9,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 87 a9 d0 c0 <unknown>
diff --git a/llvm/test/MC/AArch64/SME/addva-diagnostics.s b/llvm/test/MC/AArch64/SME/addva-diagnostics.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME/addva-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+sme-i64 2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid tile
+
+addva za4.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addva za4.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za8.d, p0/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addva za8.d, p0/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+addva za0.s, p8/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.s, p8/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za0.s, p0/m, p8/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.s, p0/m, p8/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za0.d, p8/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.d, p8/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za0.d, p0/m, p8/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.d, p0/m, p8/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SME/addva-u32.s b/llvm/test/MC/AArch64/SME/addva-u32.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME/addva-u32.s
@@ -0,0 +1,85 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addva   za0.s, p0/m, p0/m, z0.s
+// CHECK-INST: addva   za0.s, p0/m, p0/m, z0.s
+// CHECK-ENCODING: [0x00,0x00,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 00 00 91 c0 <unknown>
+
+addva   za1.s, p5/m, p2/m, z10.s
+// CHECK-INST: addva   za1.s, p5/m, p2/m, z10.s
+// CHECK-ENCODING: [0x41,0x55,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 41 55 91 c0 <unknown>
+
+addva   za3.s, p3/m, p7/m, z13.s
+// CHECK-INST: addva   za3.s, p3/m, p7/m, z13.s
+// CHECK-ENCODING: [0xa3,0xed,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: a3 ed 91 c0 <unknown>
+
+addva   za3.s, p7/m, p7/m, z31.s
+// CHECK-INST: addva   za3.s, p7/m, p7/m, z31.s
+// CHECK-ENCODING: [0xe3,0xff,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: e3 ff 91 c0 <unknown>
+
+addva   za1.s, p3/m, p0/m, z17.s
+// CHECK-INST: addva   za1.s, p3/m, p0/m, z17.s
+// CHECK-ENCODING: [0x21,0x0e,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 0e 91 c0 <unknown>
+
+addva   za1.s, p1/m, p4/m, z1.s
+// CHECK-INST: addva   za1.s, p1/m, p4/m, z1.s
+// CHECK-ENCODING: [0x21,0x84,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 84 91 c0 <unknown>
+
+addva   za0.s, p5/m, p2/m, z19.s
+// CHECK-INST: addva   za0.s, p5/m, p2/m, z19.s
+// CHECK-ENCODING: [0x60,0x56,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 60 56 91 c0 <unknown>
+
+addva   za0.s, p6/m, p0/m, z12.s
+// CHECK-INST: addva   za0.s, p6/m, p0/m, z12.s
+// CHECK-ENCODING: [0x80,0x19,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 80 19 91 c0 <unknown>
+
+addva   za1.s, p2/m, p6/m, z1.s
+// CHECK-INST: addva   za1.s, p2/m, p6/m, z1.s
+// CHECK-ENCODING: [0x21,0xc8,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 c8 91 c0 <unknown>
+
+addva   za1.s, p2/m, p0/m, z22.s
+// CHECK-INST: addva   za1.s, p2/m, p0/m, z22.s
+// CHECK-ENCODING: [0xc1,0x0a,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: c1 0a 91 c0 <unknown>
+
+addva   za2.s, p5/m, p7/m, z9.s
+// CHECK-INST: addva   za2.s, p5/m, p7/m, z9.s
+// CHECK-ENCODING: [0x22,0xf5,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 22 f5 91 c0 <unknown>
+
+addva   za3.s, p2/m, p5/m, z12.s
+// CHECK-INST: addva   za3.s, p2/m, p5/m, z12.s
+// CHECK-ENCODING: [0x83,0xa9,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 83 a9 91 c0 <unknown>
diff --git a/llvm/test/MC/AArch64/SME/addva-u64.s b/llvm/test/MC/AArch64/SME/addva-u64.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME/addva-u64.s
@@ -0,0 +1,85 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme-i64 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addva   za0.d, p0/m, p0/m, z0.d
+// CHECK-INST: addva   za0.d, p0/m, p0/m, z0.d
+// CHECK-ENCODING: [0x00,0x00,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 00 00 d1 c0 <unknown>
+
+addva   za5.d, p5/m, p2/m, z10.d
+// CHECK-INST: addva   za5.d, p5/m, p2/m, z10.d
+// CHECK-ENCODING: [0x45,0x55,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 45 55 d1 c0 <unknown>
+
+addva   za7.d, p3/m, p7/m, z13.d
+// CHECK-INST: addva   za7.d, p3/m, p7/m, z13.d
+// CHECK-ENCODING: [0xa7,0xed,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: a7 ed d1 c0 <unknown>
+
+addva   za7.d, p7/m, p7/m, z31.d
+// CHECK-INST: addva   za7.d, p7/m, p7/m, z31.d
+// CHECK-ENCODING: [0xe7,0xff,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: e7 ff d1 c0 <unknown>
+
+addva   za5.d, p3/m, p0/m, z17.d
+// CHECK-INST: addva   za5.d, p3/m, p0/m, z17.d
+// CHECK-ENCODING: [0x25,0x0e,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 25 0e d1 c0 <unknown>
+
+addva   za1.d, p1/m, p4/m, z1.d
+// CHECK-INST: addva   za1.d, p1/m, p4/m, z1.d
+// CHECK-ENCODING: [0x21,0x84,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 84 d1 c0 <unknown>
+
+addva   za0.d, p5/m, p2/m, z19.d
+// CHECK-INST: addva   za0.d, p5/m, p2/m, z19.d
+// CHECK-ENCODING: [0x60,0x56,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 60 56 d1 c0 <unknown>
+
+addva   za0.d, p6/m, p0/m, z12.d
+// CHECK-INST: addva   za0.d, p6/m, p0/m, z12.d
+// CHECK-ENCODING: [0x80,0x19,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 80 19 d1 c0 <unknown>
+
+addva   za1.d, p2/m, p6/m, z1.d
+// CHECK-INST: addva   za1.d, p2/m, p6/m, z1.d
+// CHECK-ENCODING: [0x21,0xc8,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 c8 d1 c0 <unknown>
+
+addva   za5.d, p2/m, p0/m, z22.d
+// CHECK-INST: addva   za5.d, p2/m, p0/m, z22.d
+// CHECK-ENCODING: [0xc5,0x0a,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: c5 0a d1 c0 <unknown>
+
+addva   za2.d, p5/m, p7/m, z9.d
+// CHECK-INST: addva   za2.d, p5/m, p7/m, z9.d
+// CHECK-ENCODING: [0x22,0xf5,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 22 f5 d1 c0 <unknown>
+
+addva   za7.d, p2/m, p5/m, z12.d
+// CHECK-INST: addva   za7.d, p2/m, p5/m, z12.d
+// CHECK-ENCODING: [0x87,0xa9,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 87 a9 d1 c0 <unknown>
diff --git a/llvm/unittests/Target/AArch64/CMakeLists.txt b/llvm/unittests/Target/AArch64/CMakeLists.txt
--- a/llvm/unittests/Target/AArch64/CMakeLists.txt
+++ b/llvm/unittests/Target/AArch64/CMakeLists.txt
@@ -20,4 +20,5 @@
 add_llvm_target_unittest(AArch64Tests
   InstSizes.cpp
   DecomposeStackOffsetTest.cpp
+  MatrixRegisterAliasing.cpp
   )
diff --git a/llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp b/llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp
@@ -0,0 +1,135 @@
+#include "AArch64Subtarget.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+std::unique_ptr<LLVMTargetMachine> createTargetMachine() {
+  auto TT(Triple::normalize("aarch64--"));
+  std::string CPU("generic");
+  std::string FS("+sme");
+
+  LLVMInitializeAArch64TargetInfo();
+  LLVMInitializeAArch64Target();
+  LLVMInitializeAArch64TargetMC();
+
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error);
+
+  return std::unique_ptr<LLVMTargetMachine>(
+      static_cast<LLVMTargetMachine *>(TheTarget->createTargetMachine(
+          TT, CPU, FS, TargetOptions(), None, None, CodeGenOpt::Default)));
+}
+
+std::unique_ptr<AArch64InstrInfo> createInstrInfo(TargetMachine *TM) {
+  AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
+                      std::string(TM->getTargetFeatureString()), *TM,
+                      /* isLittle */ false);
+  return std::make_unique<AArch64InstrInfo>(ST);
+}
+
+TEST(MatrixRegisterAliasing, Aliasing) {
+  std::unique_ptr<LLVMTargetMachine> TM = createTargetMachine();
+  ASSERT_TRUE(TM);
+  std::unique_ptr<AArch64InstrInfo> II = createInstrInfo(TM.get());
+
+  const AArch64RegisterInfo &TRI = II->getRegisterInfo();
+
+  // za overlaps with za.b
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZA, AArch64::ZAB0));
+
+  // za0.b overlaps with all tiles
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAQ0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAQ15));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAD0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAD7));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAS0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAS3));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAH0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAH1));
+
+  // za0.h aliases with za0.q, za2.q, ..
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ2));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ4));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ6));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ8));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ10));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ12));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ14));
+
+  // za1.h aliases with za1.q, za3.q, ...
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ1));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ3));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ5));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ7));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ9));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ11));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ13));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ15));
+
+  // za1.h doesn't alias with za0.q, za2.q, ..
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ0));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ2));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ4));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ6));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ8));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ10));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ12));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ14));
+
+  // za0.h doesn't alias with za1.q, za3.q, ..
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ1));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ3));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ5));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ7));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ9));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ11));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ13));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ15));
+
+  // za0.s aliases with za0.q, za4.q, za8.q, za12.q
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ4));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ8));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ12));
+
+  // za1.s aliases with za1.q, za5.q, za9.q, za13.q
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ1));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ5));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ9));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ13));
+
+  // za0.s doesn't alias with za1.q, za5.q, za9.q, za13.q
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ1));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ5));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ9));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ13));
+
+  // za1.s doesn't alias with za0.q, za4.q, za8.q, za12.q
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ0));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ4));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ8));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ12));
+
+  // za0.d aliases za0.q and za8.q
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ8));
+
+  // za1.d aliases za1.q and za9.q
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ1));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ9));
+
+  // za0.d doesn't alias with za1.q and za9.q
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ1));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ9));
+
+  // za1.d doesn't alias with za0.q and za8.q
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ0));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ8));
+}
+
+} // end anonymous namespace