diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -972,6 +972,7 @@
 #include "clang/Basic/OpenCLImageTypes.def"
   CanQualType OCLSamplerTy, OCLEventTy, OCLClkEventTy;
   CanQualType OCLQueueTy, OCLReserveIDTy;
+  CanQualType IncompleteMatrixIdxTy;
   CanQualType OMPArraySectionTy, OMPArrayShapingTy;
 #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
   CanQualType Id##Ty;
diff --git a/clang/include/clang/AST/BuiltinTypes.def b/clang/include/clang/AST/BuiltinTypes.def
--- a/clang/include/clang/AST/BuiltinTypes.def
+++ b/clang/include/clang/AST/BuiltinTypes.def
@@ -310,6 +310,9 @@
 // context.
 PLACEHOLDER_TYPE(ARCUnbridgedCast, ARCUnbridgedCastTy)
 
+// A placeholder type for incomplete matrix index expressions.
+PLACEHOLDER_TYPE(IncompleteMatrixIdx, IncompleteMatrixIdxTy)
+
 // A placeholder type for OpenMP array sections.
 PLACEHOLDER_TYPE(OMPArraySection, OMPArraySectionTy)
 
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -461,6 +461,11 @@
     return const_cast<Expr*>(this)->getReferencedDeclOfCallee();
   }
 
+  /// If \p Base is part of a matrix index expressions, return the access matrix
+  /// type. \p Base is part of a matrix index expression, if either Base is a
+  /// matrix (= matrix row index expr) or a matrix row index expr.
+  const MatrixType *getMatrixFromIndexExpr(bool EnableMatrix) const;
+
   /// If this expression is an l-value for an Objective C
   /// property, find the underlying property reference expression.
   const ObjCPropertyRefExpr *getObjCProperty() const;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10676,6 +10676,15 @@
 def err_builtin_matrix_disabled: Error<
   "Builtin matrix support is disabled. Pass -fenable-matrix to enable it.">;
 
+def err_matrix_idx_no_int: Error<
+  "matrix %select{row|column}0 index is not an integer">;
+
+def err_matrix_idx_outside_range: Error<
+  "matrix %select{row|column}0 index is outside the allowed range [0, %1)">;
+
+def err_matrix_incomplete_idx: Error<
+  "single subscript expressions are not allowed for matrix values">;
+
 def err_preserve_field_info_not_field : Error<
   "__builtin_preserve_field_info argument %0 not a field access">;
 def err_preserve_field_info_not_const: Error<
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1019,6 +1019,9 @@
       /// The placeholder type for OpenMP array shaping operation.
       PREDEF_TYPE_OMP_ARRAY_SHAPING = 70,
 
+      /// A placeholder type for incomplete matrix index operations.
+      PREDEF_TYPE_INCOMPLETE_MATRIX_IDX = 71,
+
       /// OpenCL image types with auto numeration
 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
       PREDEF_TYPE_##Id##_ID,
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -1390,6 +1390,8 @@
     InitBuiltinType(OMPArraySectionTy, BuiltinType::OMPArraySection);
     InitBuiltinType(OMPArrayShapingTy, BuiltinType::OMPArrayShaping);
   }
+  if (LangOpts.EnableMatrix)
+    InitBuiltinType(IncompleteMatrixIdxTy, BuiltinType::IncompleteMatrixIdx);
 
   // C99 6.2.5p11.
   FloatComplexTy      = getComplexType(FloatTy);
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -3821,6 +3821,19 @@
   return nullptr;
 }
 
+const MatrixType *Expr::getMatrixFromIndexExpr(bool EnableMatrix) const {
+  if (!EnableMatrix)
+    return nullptr;
+  if (getType()->isMatrixType())
+    return getType()->getAs<MatrixType>();
+  if (isa<ArraySubscriptExpr>(this))
+    return cast<ArraySubscriptExpr>(this)
+        ->getBase()
+        ->getType()
+        ->getAs<MatrixType>();
+  return nullptr;
+}
+
 bool Expr::refersToVectorElement() const {
   // FIXME: Why do we not just look at the ObjectKind here?
   const Expr *E = this->IgnoreParens();
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -7737,6 +7737,10 @@
   if (E->getBase()->getType()->isVectorType())
     return Error(E);
 
+  // Skip matrixes as subscript bases.
+  if (E->getBase()->getMatrixFromIndexExpr(Info.getLangOpts().EnableMatrix))
+    return false;
+
   bool Success = true;
   if (!evaluatePointer(E->getBase(), Result)) {
     if (!Info.noteFailure())
diff --git a/clang/lib/AST/NSAPI.cpp b/clang/lib/AST/NSAPI.cpp
--- a/clang/lib/AST/NSAPI.cpp
+++ b/clang/lib/AST/NSAPI.cpp
@@ -482,6 +482,7 @@
   case BuiltinType::Half:
   case BuiltinType::PseudoObject:
   case BuiltinType::BuiltinFn:
+  case BuiltinType::IncompleteMatrixIdx:
   case BuiltinType::OMPArraySection:
   case BuiltinType::OMPArrayShaping:
     break;
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2963,6 +2963,8 @@
     return "queue_t";
   case OCLReserveID:
     return "reserve_id_t";
+  case IncompleteMatrixIdx:
+    return "<incomplete matrix index type>";
   case OMPArraySection:
     return "<OpenMP array section type>";
   case OMPArrayShaping:
@@ -3976,6 +3978,7 @@
 #include "clang/Basic/AArch64SVEACLETypes.def"
     case BuiltinType::BuiltinFn:
     case BuiltinType::NullPtr:
+    case BuiltinType::IncompleteMatrixIdx:
     case BuiltinType::OMPArraySection:
     case BuiltinType::OMPArrayShaping:
       return false;
diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp
--- a/clang/lib/AST/TypeLoc.cpp
+++ b/clang/lib/AST/TypeLoc.cpp
@@ -403,6 +403,7 @@
   case BuiltinType::Id:
 #include "clang/Basic/AArch64SVEACLETypes.def"
   case BuiltinType::BuiltinFn:
+  case BuiltinType::IncompleteMatrixIdx:
   case BuiltinType::OMPArraySection:
   case BuiltinType::OMPArrayShaping:
     return TST_unspecified;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1838,16 +1838,16 @@
     assert(!LV.getType()->isFunctionType());
 
     if (LV.getType()->isMatrixType()) {
+      auto *Ptr = LV.getPointer(*this);
       auto *ArrayTy = dyn_cast<llvm::ArrayType>(
-          cast<llvm::PointerType>(LV.getPointer(*this)->getType())
-              ->getElementType());
+          cast<llvm::PointerType>(Ptr->getType())->getElementType());
       if (ArrayTy) {
         auto *VectorTy = llvm::VectorType::get(ArrayTy->getElementType(),
                                                ArrayTy->getNumElements());
 
-        LV.setAddress(Address(Builder.CreateBitCast(LV.getPointer(*this),
-                                                    VectorTy->getPointerTo()),
-                              LV.getAlignment()));
+        LV.setAddress(
+            Address(Builder.CreateBitCast(Ptr, VectorTy->getPointerTo()),
+                    LV.getAlignment()));
       }
     }
 
@@ -1855,6 +1855,25 @@
     return RValue::get(EmitLoadOfScalar(LV, Loc));
   }
 
+  if (LV.isMatrixElt()) {
+    assert(LV.getType()->isMatrixType() &&
+           "matrix element LValues need to access a matrix");
+    auto *Ptr = LV.getVectorPointer();
+    auto *ArrayTy = dyn_cast<llvm::ArrayType>(
+        cast<llvm::PointerType>(Ptr->getType())->getElementType());
+    Address Addr = LV.getVectorAddress();
+    if (ArrayTy) {
+      auto *VectorTy = llvm::VectorType::get(ArrayTy->getElementType(),
+                                             ArrayTy->getNumElements());
+
+      Addr = Address(Builder.CreateBitCast(Ptr, VectorTy->getPointerTo()),
+                     LV.getAlignment());
+    }
+    llvm::LoadInst *Load = Builder.CreateLoad(Addr, LV.isVolatileQualified());
+    return RValue::get(
+        Builder.CreateExtractElement(Load, LV.getVectorIdx(), "matext"));
+  }
+
   if (LV.isVectorElt()) {
     llvm::LoadInst *Load = Builder.CreateLoad(LV.getVectorAddress(),
                                               LV.isVolatileQualified());
@@ -2003,6 +2022,28 @@
     if (Dst.isGlobalReg())
       return EmitStoreThroughGlobalRegLValue(Src, Dst);
 
+    if (Dst.isMatrixElt()) {
+      llvm::Value *DstPtr = Dst.getVectorPointer();
+      llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(
+          cast<llvm::PointerType>(DstPtr->getType())->getElementType());
+      llvm::VectorType *VectorTy =
+          ArrTy ? llvm::VectorType::get(ArrTy->getElementType(),
+                                        ArrTy->getNumElements())
+                : cast<llvm::VectorType>(
+                      cast<llvm::PointerType>(DstPtr->getType())
+                          ->getElementType());
+
+      DstPtr = Builder.CreateBitCast(DstPtr, VectorTy->getPointerTo());
+      Address Addr(DstPtr,
+                   CharUnits::fromQuantity(
+                       VectorTy->getElementType()->getScalarSizeInBits() / 8));
+      llvm::Value *Vec = Builder.CreateLoad(Addr);
+      Vec = Builder.CreateInsertElement(Vec, Src.getScalarVal(),
+                                        Dst.getVectorIdx(), "matins");
+
+      Builder.CreateStore(Vec, Addr, Dst.isVolatileQualified());
+      return;
+    }
     assert(Dst.isBitField() && "Unknown LValue type");
     return EmitStoreThroughBitfieldLValue(Src, Dst);
   }
@@ -3614,6 +3655,28 @@
                                  TBAAAccessInfo());
   }
 
+  // If the base is a matrix type, we have a matrix index expressions (with row
+  // and column indices). We are forming a matrix element lvalue with an index
+  // into the matrix as a flattened vector.
+  if (auto *MTy =
+          E->getBase()->getMatrixFromIndexExpr(getLangOpts().EnableMatrix)) {
+    auto *ColIdxExpr = cast<ArraySubscriptExpr>(E->getBase());
+    LValue Base = EmitLValue(ColIdxExpr->getBase());
+    llvm::Value *RowIdx = EmitScalarExpr(ColIdxExpr->getIdx());
+    llvm::Value *ColIdx = EmitScalarExpr(E->getIdx());
+    unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(),
+                                 ColIdx->getType()->getScalarSizeInBits());
+    llvm::Type *IntTy = llvm::IntegerType::get(getLLVMContext(), MaxWidth);
+    RowIdx = Builder.CreateZExt(RowIdx, IntTy);
+    ColIdx = Builder.CreateZExt(ColIdx, IntTy);
+    llvm::Value *NumRows = Builder.getIntN(MaxWidth, MTy->getNumRows());
+    llvm::Value *FinalIdx =
+        Builder.CreateAdd(Builder.CreateMul(ColIdx, NumRows), RowIdx);
+    return LValue::MakeMatrixElt(Base.getAddress(*this), FinalIdx,
+                                 ColIdxExpr->getBase()->getType(),
+                                 Base.getBaseInfo(), TBAAAccessInfo());
+  }
+
   // All the other cases basically behave like simple offsetting.
 
   // Handle the extvector case we ignored above.
diff --git a/clang/lib/CodeGen/CGValue.h b/clang/lib/CodeGen/CGValue.h
--- a/clang/lib/CodeGen/CGValue.h
+++ b/clang/lib/CodeGen/CGValue.h
@@ -170,7 +170,8 @@
     VectorElt,    // This is a vector element l-value (V[i]), use getVector*
     BitField,     // This is a bitfield l-value, use getBitfield*.
     ExtVectorElt, // This is an extended vector subset, use getExtVectorComp
-    GlobalReg     // This is a register l-value, use getGlobalReg()
+    GlobalReg,    // This is a register l-value, use getGlobalReg()
+    MatrixElt     // This is a matrix element, use getVector*
   } LVType;
 
   llvm::Value *V;
@@ -254,6 +255,7 @@
   bool isBitField() const { return LVType == BitField; }
   bool isExtVectorElt() const { return LVType == ExtVectorElt; }
   bool isGlobalReg() const { return LVType == GlobalReg; }
+  bool isMatrixElt() const { return LVType == MatrixElt; }
 
   bool isVolatileQualified() const { return Quals.hasVolatile(); }
   bool isRestrictQualified() const { return Quals.hasRestrict(); }
@@ -337,8 +339,14 @@
   Address getVectorAddress() const {
     return Address(getVectorPointer(), getAlignment());
   }
-  llvm::Value *getVectorPointer() const { assert(isVectorElt()); return V; }
-  llvm::Value *getVectorIdx() const { assert(isVectorElt()); return VectorIdx; }
+  llvm::Value *getVectorPointer() const {
+    assert(isVectorElt() || isMatrixElt());
+    return V;
+  }
+  llvm::Value *getVectorIdx() const {
+    assert(isVectorElt() || isMatrixElt());
+    return VectorIdx;
+  }
 
   // extended vector elements.
   Address getExtVectorAddress() const {
@@ -430,6 +438,18 @@
     return R;
   }
 
+  static LValue MakeMatrixElt(Address matAddress, llvm::Value *Idx,
+                              QualType type, LValueBaseInfo BaseInfo,
+                              TBAAAccessInfo TBAAInfo) {
+    LValue R;
+    R.LVType = MatrixElt;
+    R.V = matAddress.getPointer();
+    R.VectorIdx = Idx;
+    R.Initialize(type, type.getQualifiers(), matAddress.getAlignment(),
+                 BaseInfo, TBAAInfo);
+    return R;
+  }
+
   RValue asAggregateRValue(CodeGenFunction &CGF) const {
     return RValue::getAggregate(getAddress(CGF), isVolatileQualified());
   }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -4547,7 +4547,9 @@
   // resolution for the operator overload should get the first crack
   // at the overload.
   bool IsMSPropertySubscript = false;
-  if (base->getType()->isNonOverloadPlaceholderType()) {
+  auto BaseTy = base->getType();
+  if (BaseTy->isNonOverloadPlaceholderType() &&
+      !BaseTy->isSpecificPlaceholderType(BuiltinType::IncompleteMatrixIdx)) {
     IsMSPropertySubscript = isMSPropertySubscriptExpr(*this, base);
     if (!IsMSPropertySubscript) {
       ExprResult result = CheckPlaceholderExpr(base);
@@ -4894,7 +4896,8 @@
   }
 
   // Perform default conversions.
-  if (!LHSExp->getType()->getAs<VectorType>()) {
+  if (!LHSExp->getType()->getAs<VectorType>() &&
+      !LHSExp->getMatrixFromIndexExpr(getLangOpts().EnableMatrix)) {
     ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp);
     if (Result.isInvalid())
       return ExprError();
@@ -4985,6 +4988,41 @@
     BaseExpr = LHSExp;
     IndexExpr = RHSExp;
     ResultType = LHSTy->getAs<PointerType>()->getPointeeType();
+  } else if (auto *MTy =
+                 LHSExp->getMatrixFromIndexExpr(getLangOpts().EnableMatrix)) {
+    BaseExpr = LHSExp;
+    IndexExpr = RHSExp;
+
+    // Validate index.
+    llvm::APSInt Idx;
+    bool isRowIdx = !isa<ArraySubscriptExpr>(BaseExpr);
+    if (!IndexExpr->isIntegerConstantExpr(Idx, Context)) {
+      Diag(IndexExpr->getBeginLoc(), diag::err_matrix_idx_no_int)
+          << (isRowIdx ? 0 : 1);
+      return ExprError();
+    }
+    unsigned Dims = isRowIdx ? MTy->getNumRows() : MTy->getNumColumns();
+    if (Idx < 0 || Idx >= Dims) {
+      Diag(IndexExpr->getBeginLoc(), diag::err_matrix_idx_outside_range)
+          << (isRowIdx ? 0 : 1) << Dims;
+      return ExprError();
+    }
+
+    // Set the type of the outer ArraySubscriptExpr to the element type and for
+    // the inner expression to the matrix type. That ensures an error when only
+    // using a single ArraySubscriptExpr on a matrix.
+    // TODO: Improve the error message when using a single ArraySubscriptExpr
+    // with a matrix. Currently it complains about a type mismatch between
+    // element type and matrix type.
+    if (isRowIdx)
+      ResultType = Context.IncompleteMatrixIdxTy;
+    else {
+      ResultType = MTy->getElementType();
+      BaseExpr->setType(ResultType);
+    }
+
+    VK = VK_LValue;
+    OK = OK_VectorComponent;
   } else if (RHSTy->isArrayType()) {
     // Same as previous, except for 123[f().a] case
     Diag(RHSExp->getBeginLoc(), diag::ext_subscript_non_lvalue)
@@ -5633,6 +5671,7 @@
   // These are always invalid as call arguments and should be reported.
   case BuiltinType::BoundMember:
   case BuiltinType::BuiltinFn:
+  case BuiltinType::IncompleteMatrixIdx:
   case BuiltinType::OMPArraySection:
   case BuiltinType::OMPArrayShaping:
     return true;
@@ -18505,6 +18544,11 @@
     return ExprError();
   }
 
+  case BuiltinType::IncompleteMatrixIdx:
+    Diag(cast<ArraySubscriptExpr>(E)->getIdx()->getBeginLoc(),
+         diag::err_matrix_incomplete_idx);
+    return ExprError();
+
   // Expressions of unknown type.
   case BuiltinType::OMPArraySection:
     Diag(E->getBeginLoc(), diag::err_omp_array_section_use);
diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp
--- a/clang/lib/Serialization/ASTCommon.cpp
+++ b/clang/lib/Serialization/ASTCommon.cpp
@@ -240,6 +240,9 @@
   case BuiltinType::BuiltinFn:
     ID = PREDEF_TYPE_BUILTIN_FN;
     break;
+  case BuiltinType::IncompleteMatrixIdx:
+    ID = PREDEF_TYPE_INCOMPLETE_MATRIX_IDX;
+    break;
   case BuiltinType::OMPArraySection:
     ID = PREDEF_TYPE_OMP_ARRAY_SECTION;
     break;
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -6963,6 +6963,9 @@
     case PREDEF_TYPE_BUILTIN_FN:
       T = Context.BuiltinFnTy;
       break;
+    case PREDEF_TYPE_INCOMPLETE_MATRIX_IDX:
+      T = Context.IncompleteMatrixIdxTy;
+      break;
     case PREDEF_TYPE_OMP_ARRAY_SECTION:
       T = Context.OMPArraySectionTy;
       break;
diff --git a/clang/test/CodeGen/matrix-type-operators.c b/clang/test/CodeGen/matrix-type-operators.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/matrix-type-operators.c
@@ -0,0 +1,157 @@
+// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// Tests for the matrix type operators.
+
+typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
+typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
+
+// Check that we can use matrix index expression on different floating point
+// matrixes and
+void insert_fp(dx5x5_t a, double d, fx2x3_t b, float e) {
+  // CHECK-LABEL: @insert_fp(
+  // CHECK-NEXT: entry:
+  // CHECK-NEXT:    %a.addr = alloca [25 x double], align 8
+  // CHECK-NEXT:    %d.addr = alloca double, align 8
+  // CHECK-NEXT:    %b.addr = alloca [6 x float], align 4
+  // CHECK-NEXT:    %e.addr = alloca float, align 4
+  // CHECK-NEXT:    %0 = bitcast [25 x double]* %a.addr to <25 x double>*
+  // CHECK-NEXT:    store <25 x double> %a, <25 x double>* %0, align 8
+  // CHECK-NEXT:    store double %d, double* %d.addr, align 8
+  // CHECK-NEXT:    %1 = bitcast [6 x float]* %b.addr to <6 x float>*
+  // CHECK-NEXT:    store <6 x float> %b, <6 x float>* %1, align 4
+  // CHECK-NEXT:    store float %e, float* %e.addr, align 4
+  // CHECK-NEXT:    %2 = load double, double* %d.addr, align 8
+  // CHECK-NEXT:    %3 = load <25 x double>, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %matins = insertelement <25 x double> %3, double %2, i32 5
+  // CHECK-NEXT:    store <25 x double> %matins, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %4 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %5 = load <6 x float>, <6 x float>* %1, align 4
+  // CHECK-NEXT:    %matins1 = insertelement <6 x float> %5, float %4, i32 1
+  // CHECK-NEXT:    store <6 x float> %matins1, <6 x float>* %1, align 4
+  // CHECK-NEXT:    %6 = load double, double* %d.addr, align 8
+  // CHECK-NEXT:    %7 = load <25 x double>, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %matins2 = insertelement <25 x double> %7, double %6, i32 1
+  // CHECK-NEXT:    store <25 x double> %matins2, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %8 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %9 = load <6 x float>, <6 x float>* %1, align 4
+  // CHECK-NEXT:    %matins3 = insertelement <6 x float> %9, float %8, i32 3
+  // CHECK-NEXT:    store <6 x float> %matins3, <6 x float>* %1, align 4
+  // CHECK-NEXT:   ret void
+
+  a[0u][1u] = d;
+  b[1u][0u] = e;
+  a[1u][0u] = d;
+  b[1u][1u] = e;
+}
+
+// Check that we can can use matrix index expressions on integer matrixes.
+typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
+void insert_int(ix9x3_t a, int i) {
+  // CHECK-LABEL: @insert_int(
+  // CHECK-NEXT: entry:
+  // CHECK-NEXT:   %a.addr = alloca [27 x i32], align 4
+  // CHECK-NEXT:   %i.addr = alloca i32, align 4
+  // CHECK-NEXT:   %0 = bitcast [27 x i32]* %a.addr to <27 x i32>*
+  // CHECK-NEXT:   store <27 x i32> %a, <27 x i32>* %0, align 4
+  // CHECK-NEXT:   store i32 %i, i32* %i.addr, align 4
+  // CHECK-NEXT:   %1 = load i32, i32* %i.addr, align 4
+  // CHECK-NEXT:   %2 = load <27 x i32>, <27 x i32>* %0, align 4
+  // CHECK-NEXT:   %matins = insertelement <27 x i32> %2, i32 %1, i32 13
+  // CHECK-NEXT:   store <27 x i32> %matins, <27 x i32>* %0, align 4
+  // CHECK-NEXT:   ret void
+
+  a[4u][1u] = i;
+}
+
+// Check that we can can use matrix index expressions on FP and integer
+// matrixes.
+typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
+void insert_int_fp(ix9x3_t *a, int i, fx2x3_t b, float e) {
+  // CHECK-LABEL: @insert_int_fp(
+  // CHECK-NEXT: entry:
+  // CHECK-NEXT:    %a.addr = alloca [27 x i32]*, align 8
+  // CHECK-NEXT:    %i.addr = alloca i32, align 4
+  // CHECK-NEXT:    %b.addr = alloca [6 x float], align 4
+  // CHECK-NEXT:    %e.addr = alloca float, align 4
+  // CHECK-NEXT:    store [27 x i32]* %a, [27 x i32]** %a.addr, align 8
+  // CHECK-NEXT:    store i32 %i, i32* %i.addr, align 4
+  // CHECK-NEXT:    %0 = bitcast [6 x float]* %b.addr to <6 x float>*
+  // CHECK-NEXT:    store <6 x float> %b, <6 x float>* %0, align 4
+  // CHECK-NEXT:    store float %e, float* %e.addr, align 4
+  // CHECK-NEXT:    %1 = load i32, i32* %i.addr, align 4
+  // CHECK-NEXT:    %2 = load [27 x i32]*, [27 x i32]** %a.addr, align 8
+  // CHECK-NEXT:    %3 = bitcast [27 x i32]* %2 to <27 x i32>*
+  // CHECK-NEXT:    %4 = load <27 x i32>, <27 x i32>* %3, align 4
+  // CHECK-NEXT:    %matins = insertelement <27 x i32> %4, i32 %1, i32 13
+  // CHECK-NEXT:    store <27 x i32> %matins, <27 x i32>* %3, align 4
+  // CHECK-NEXT:    %5 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %6 = load <6 x float>, <6 x float>* %0, align 4
+  // CHECK-NEXT:    %matins1 = insertelement <6 x float> %6, float %5, i32 3
+  // CHECK-NEXT:    store <6 x float> %matins1, <6 x float>* %0, align 4
+  // CHECK-NEXT:    ret void
+
+  (*a)[4u][1u] = i;
+  b[1u][1u] = e;
+}
+
+// Check that we can use overloaded matrix index expressions on matrixes with
+// matching dimensions, but different element types.
+typedef double dx3x3_t __attribute__((matrix_type(3, 3)));
+typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
+void insert_matching_dimensions(dx3x3_t a, double i, fx3x3_t b, float e) {
+  // CHECK-LABEL: @insert_matching_dimensions(
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %a.addr = alloca [9 x double], align 8
+  // CHECK-NEXT:    %i.addr = alloca double, align 8
+  // CHECK-NEXT:    %b.addr = alloca [9 x float], align 4
+  // CHECK-NEXT:    %e.addr = alloca float, align 4
+  // CHECK-NEXT:    %0 = bitcast [9 x double]* %a.addr to <9 x double>*
+  // CHECK-NEXT:    store <9 x double> %a, <9 x double>* %0, align 8
+  // CHECK-NEXT:    store double %i, double* %i.addr, align 8
+  // CHECK-NEXT:    %1 = bitcast [9 x float]* %b.addr to <9 x float>*
+  // CHECK-NEXT:    store <9 x float> %b, <9 x float>* %1, align 4
+  // CHECK-NEXT:    store float %e, float* %e.addr, align 4
+  // CHECK-NEXT:    %2 = load double, double* %i.addr, align 8
+  // CHECK-NEXT:    %3 = load <9 x double>, <9 x double>* %0, align 8
+  // CHECK-NEXT:    %matins = insertelement <9 x double> %3, double %2, i32 5
+  // CHECK-NEXT:    store <9 x double> %matins, <9 x double>* %0, align 8
+  // CHECK-NEXT:    %4 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %5 = load <9 x float>, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %matins1 = insertelement <9 x float> %5, float %4, i32 7
+  // CHECK-NEXT:    store <9 x float> %matins1, <9 x float>* %1, align 4
+  // CHECK-NEXT:   ret void
+
+  a[2u][1u] = i;
+  b[1u][2u] = e;
+}
+
+void extract1(dx5x5_t a, fx3x3_t b, ix9x3_t c) {
+  double v1 = a[2][3];
+  float v2 = b[2][1];
+  int v3 = c[1][1];
+
+  // CHECK-LABEL: @extract1(
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %a.addr = alloca [25 x double], align 8
+  // CHECK-NEXT:    %b.addr = alloca [9 x float], align 4
+  // CHECK-NEXT:    %c.addr = alloca [27 x i32], align 4
+  // CHECK-NEXT:    %v1 = alloca double, align 8
+  // CHECK-NEXT:    %v2 = alloca float, align 4
+  // CHECK-NEXT:    %v3 = alloca i32, align 4
+  // CHECK-NEXT:    %0 = bitcast [25 x double]* %a.addr to <25 x double>*
+  // CHECK-NEXT:    store <25 x double> %a, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %1 = bitcast [9 x float]* %b.addr to <9 x float>*
+  // CHECK-NEXT:    store <9 x float> %b, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %2 = bitcast [27 x i32]* %c.addr to <27 x i32>*
+  // CHECK-NEXT:    store <27 x i32> %c, <27 x i32>* %2, align 4
+  // CHECK-NEXT:    %3 = load <25 x double>, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %matext = extractelement <25 x double> %3, i32 17
+  // CHECK-NEXT:    store double %matext, double* %v1, align 8
+  // CHECK-NEXT:    %4 = load <9 x float>, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %matext1 = extractelement <9 x float> %4, i32 5
+  // CHECK-NEXT:    store float %matext1, float* %v2, align 4
+  // CHECK-NEXT:    %5 = load <27 x i32>, <27 x i32>* %2, align 4
+  // CHECK-NEXT:    %matext2 = extractelement <27 x i32> %5, i32 10
+  // CHECK-NEXT:    store i32 %matext2, i32* %v3, align 4
+  // CHECK-NEXT:    ret void
+}
diff --git a/clang/test/CodeGenCXX/matrix-type-operators.cpp b/clang/test/CodeGenCXX/matrix-type-operators.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGenCXX/matrix-type-operators.cpp
@@ -0,0 +1,211 @@
+// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck %s
+
+typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
+using fx2x3_t = float __attribute__((matrix_type(2, 3)));
+
+void insert_fp(dx5x5_t *a, double d, fx2x3_t *b, float e) {
+  (*a)[0u][1u] = d;
+  (*b)[1u][0u] = e;
+
+  // CHECK-LABEL: @_Z9insert_fpPDm5_5_ddPDm2_3_ff(
+  // CHECK-NEXT: entry:
+  // CHECK-NEXT:    %a.addr = alloca [25 x double]*, align 8
+  // CHECK-NEXT:    %d.addr = alloca double, align 8
+  // CHECK-NEXT:    %b.addr = alloca [6 x float]*, align 8
+  // CHECK-NEXT:    %e.addr = alloca float, align 4
+  // CHECK-NEXT:    store [25 x double]* %a, [25 x double]** %a.addr, align 8
+  // CHECK-NEXT:    store double %d, double* %d.addr, align 8
+  // CHECK-NEXT:    store [6 x float]* %b, [6 x float]** %b.addr, align 8
+  // CHECK-NEXT:    store float %e, float* %e.addr, align 4
+  // CHECK-NEXT:    %0 = load double, double* %d.addr, align 8
+  // CHECK-NEXT:    %1 = load [25 x double]*, [25 x double]** %a.addr, align 8
+  // CHECK-NEXT:    %2 = bitcast [25 x double]* %1 to <25 x double>*
+  // CHECK-NEXT:    %3 = load <25 x double>, <25 x double>* %2, align 8
+  // CHECK-NEXT:    %matins = insertelement <25 x double> %3, double %0, i32 5
+  // CHECK-NEXT:    store <25 x double> %matins, <25 x double>* %2, align 8
+  // CHECK-NEXT:    %4 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %5 = load [6 x float]*, [6 x float]** %b.addr, align 8
+  // CHECK-NEXT:    %6 = bitcast [6 x float]* %5 to <6 x float>*
+  // CHECK-NEXT:    %7 = load <6 x float>, <6 x float>* %6, align 4
+  // CHECK-NEXT:    %matins1 = insertelement <6 x float> %7, float %4, i32 1
+  // CHECK-NEXT:    store <6 x float> %matins1, <6 x float>* %6, align 4
+  // CHECK-NEXT:    ret void
+}
+
+typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
+
+void insert_int(ix9x3_t *a, int i) {
+  (*a)[4u][1u] = i;
+
+  // CHECK-LABEL: @_Z10insert_intPDm9_3_ii(
+  // CHECK-NEXT: entry:
+  // CHECK-NEXT:    %a.addr = alloca [27 x i32]*, align 8
+  // CHECK-NEXT:    %i.addr = alloca i32, align 4
+  // CHECK-NEXT:    store [27 x i32]* %a, [27 x i32]** %a.addr, align 8
+  // CHECK-NEXT:    store i32 %i, i32* %i.addr, align 4
+  // CHECK-NEXT:    %0 = load i32, i32* %i.addr, align 4
+  // CHECK-NEXT:    %1 = load [27 x i32]*, [27 x i32]** %a.addr, align 8
+  // CHECK-NEXT:    %2 = bitcast [27 x i32]* %1 to <27 x i32>*
+  // CHECK-NEXT:    %3 = load <27 x i32>, <27 x i32>* %2, align 4
+  // CHECK-NEXT:    %matins = insertelement <27 x i32> %3, i32 %0, i32 13
+  // CHECK-NEXT:    store <27 x i32> %matins, <27 x i32>* %2, align 4
+  // CHECK-NEXT:    ret void
+}
+
+template <typename EltTy, unsigned Rows, unsigned Columns>
+struct MyMatrix {
+  using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns)));
+
+  matrix_t value;
+};
+
+template <typename EltTy, unsigned Rows, unsigned Columns>
+void insert(MyMatrix<EltTy, Rows, Columns> &Mat, EltTy e) {
+  Mat.value[1u][0u] = e;
+}
+
+void test_template(unsigned *Ptr1, unsigned E1, float *Ptr2, float E2) {
+
+  // CHECK-LABEL: define void @_Z13test_templatePjjPff(i32* %Ptr1, i32 %E1, float* %Ptr2, float %E2)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %Ptr1.addr = alloca i32*, align 8
+  // CHECK-NEXT:    %E1.addr = alloca i32, align 4
+  // CHECK-NEXT:    %Ptr2.addr = alloca float*, align 8
+  // CHECK-NEXT:    %E2.addr = alloca float, align 4
+  // CHECK-NEXT:    %Mat1 = alloca %struct.MyMatrix, align 4
+  // CHECK-NEXT:    %Mat2 = alloca %struct.MyMatrix.0, align 4
+  // CHECK-NEXT:    store i32* %Ptr1, i32** %Ptr1.addr, align 8
+  // CHECK-NEXT:    store i32 %E1, i32* %E1.addr, align 4
+  // CHECK-NEXT:    store float* %Ptr2, float** %Ptr2.addr, align 8
+  // CHECK-NEXT:    store float %E2, float* %E2.addr, align 4
+  // CHECK-NEXT:    %0 = load i32*, i32** %Ptr1.addr, align 8
+  // CHECK-NEXT:    %1 = bitcast i32* %0 to [4 x i32]*
+  // CHECK-NEXT:    %2 = bitcast [4 x i32]* %1 to <4 x i32>*
+  // CHECK-NEXT:    %3 = load <4 x i32>, <4 x i32>* %2, align 4
+  // CHECK-NEXT:    %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %Mat1, i32 0, i32 0
+  // CHECK-NEXT:    %4 = bitcast [4 x i32]* %value to <4 x i32>*
+  // CHECK-NEXT:    store <4 x i32> %3, <4 x i32>* %4, align 4
+  // CHECK-NEXT:    %5 = load i32, i32* %E1.addr, align 4
+  // CHECK-NEXT:    call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix* dereferenceable(16) %Mat1, i32 %5)
+  // CHECK-NEXT:    %6 = load float*, float** %Ptr2.addr, align 8
+  // CHECK-NEXT:    %7 = bitcast float* %6 to [24 x float]*
+  // CHECK-NEXT:    %8 = bitcast [24 x float]* %7 to <24 x float>*
+  // CHECK-NEXT:    %9 = load <24 x float>, <24 x float>* %8, align 4
+  // CHECK-NEXT:    %value1 = getelementptr inbounds %struct.MyMatrix.0, %struct.MyMatrix.0* %Mat2, i32 0, i32 0
+  // CHECK-NEXT:    %10 = bitcast [24 x float]* %value1 to <24 x float>*
+  // CHECK-NEXT:    store <24 x float> %9, <24 x float>* %10, align 4
+  // CHECK-NEXT:    %11 = load float, float* %E2.addr, align 4
+  // CHECK-NEXT:    call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix.0* dereferenceable(96) %Mat2, float %11)
+  // CHECK-NEXT:    ret void
+
+  // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix* dereferenceable(16) %Mat, i32 %e)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %Mat.addr = alloca %struct.MyMatrix*, align 8
+  // CHECK-NEXT:    %e.addr = alloca i32, align 4
+  // CHECK-NEXT:    store %struct.MyMatrix* %Mat, %struct.MyMatrix** %Mat.addr, align 8
+  // CHECK-NEXT:    store i32 %e, i32* %e.addr, align 4
+  // CHECK-NEXT:    %0 = load i32, i32* %e.addr, align 4
+  // CHECK-NEXT:    %1 = load %struct.MyMatrix*, %struct.MyMatrix** %Mat.addr, align 8
+  // CHECK-NEXT:    %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %1, i32 0, i32 0
+  // CHECK-NEXT:    %2 = bitcast [4 x i32]* %value to <4 x i32>*
+  // CHECK-NEXT:    %3 = load <4 x i32>, <4 x i32>* %2, align 4
+  // CHECK-NEXT:    %matins = insertelement <4 x i32> %3, i32 %0, i32 1
+  // CHECK-NEXT:    store <4 x i32> %matins, <4 x i32>* %2, align 4
+  // CHECK-NEXT:    ret void
+
+  // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix.0* dereferenceable(96) %Mat, float %e)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %Mat.addr = alloca %struct.MyMatrix.0*, align 8
+  // CHECK-NEXT:    %e.addr = alloca float, align 4
+  // CHECK-NEXT:    store %struct.MyMatrix.0* %Mat, %struct.MyMatrix.0** %Mat.addr, align 8
+  // CHECK-NEXT:    store float %e, float* %e.addr, align 4
+  // CHECK-NEXT:    %0 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %1 = load %struct.MyMatrix.0*, %struct.MyMatrix.0** %Mat.addr, align 8
+  // CHECK-NEXT:    %value = getelementptr inbounds %struct.MyMatrix.0, %struct.MyMatrix.0* %1, i32 0, i32 0
+  // CHECK-NEXT:    %2 = bitcast [24 x float]* %value to <24 x float>*
+  // CHECK-NEXT:    %3 = load <24 x float>, <24 x float>* %2, align 4
+  // CHECK-NEXT:    %matins = insertelement <24 x float> %3, float %0, i32 1
+  // CHECK-NEXT:    store <24 x float> %matins, <24 x float>* %2, align 4
+  // CHECK-NEXT:    ret void
+
+  MyMatrix<unsigned, 2, 2> Mat1;
+  Mat1.value = *((decltype(Mat1)::matrix_t *)Ptr1);
+  insert(Mat1, E1);
+
+  MyMatrix<float, 3, 8> Mat2;
+  Mat2.value = *((decltype(Mat2)::matrix_t *)Ptr2);
+  insert(Mat2, E2);
+}
+
+typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
+void extract1(dx5x5_t a, fx3x3_t b, ix9x3_t c) {
+  // CHECK-LABEL: @_Z8extract1Dm5_5_dDm3_3_fDm9_3_i(
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %a.addr = alloca [25 x double], align 8
+  // CHECK-NEXT:    %b.addr = alloca [9 x float], align 4
+  // CHECK-NEXT:    %c.addr = alloca [27 x i32], align 4
+  // CHECK-NEXT:    %v1 = alloca double, align 8
+  // CHECK-NEXT:    %v2 = alloca float, align 4
+  // CHECK-NEXT:    %v3 = alloca i32, align 4
+  // CHECK-NEXT:    %0 = bitcast [25 x double]* %a.addr to <25 x double>*
+  // CHECK-NEXT:    store <25 x double> %a, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %1 = bitcast [9 x float]* %b.addr to <9 x float>*
+  // CHECK-NEXT:    store <9 x float> %b, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %2 = bitcast [27 x i32]* %c.addr to <27 x i32>*
+  // CHECK-NEXT:    store <27 x i32> %c, <27 x i32>* %2, align 4
+  // CHECK-NEXT:    %3 = load <25 x double>, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %matext = extractelement <25 x double> %3, i32 17
+  // CHECK-NEXT:    store double %matext, double* %v1, align 8
+  // CHECK-NEXT:    %4 = load <9 x float>, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %matext1 = extractelement <9 x float> %4, i32 5
+  // CHECK-NEXT:    store float %matext1, float* %v2, align 4
+  // CHECK-NEXT:    %5 = load <27 x i32>, <27 x i32>* %2, align 4
+  // CHECK-NEXT:    %matext2 = extractelement <27 x i32> %5, i32 10
+  // CHECK-NEXT:    store i32 %matext2, i32* %v3, align 4
+  // CHECK-NEXT:    ret void
+
+  double v1 = a[2][3];
+  float v2 = b[2][1];
+  int v3 = c[1][1];
+}
+
+template <typename EltTy, unsigned Rows, unsigned Columns>
+EltTy extract(MyMatrix<EltTy, Rows, Columns> &Mat) {
+  return Mat.value[1u][0u];
+}
+
+void test_extract_template(unsigned *Ptr1, float *Ptr2) {
+  // CHECK-LABEL: define void @_Z21test_extract_templatePjPf(i32* %Ptr1, float* %Ptr2)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %Ptr1.addr = alloca i32*, align 8
+  // CHECK-NEXT:    %Ptr2.addr = alloca float*, align 8
+  // CHECK-NEXT:    %Mat1 = alloca %struct.MyMatrix, align 4
+  // CHECK-NEXT:    %v1 = alloca i32, align 4
+  // CHECK-NEXT:    store i32* %Ptr1, i32** %Ptr1.addr, align 8
+  // CHECK-NEXT:    store float* %Ptr2, float** %Ptr2.addr, align 8
+  // CHECK-NEXT:    %0 = load i32*, i32** %Ptr1.addr, align 8
+  // CHECK-NEXT:    %1 = bitcast i32* %0 to [4 x i32]*
+  // CHECK-NEXT:    %2 = bitcast [4 x i32]* %1 to <4 x i32>*
+  // CHECK-NEXT:    %3 = load <4 x i32>, <4 x i32>* %2, align 4
+  // CHECK-NEXT:    %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %Mat1, i32 0, i32 0
+  // CHECK-NEXT:    %4 = bitcast [4 x i32]* %value to <4 x i32>*
+  // CHECK-NEXT:    store <4 x i32> %3, <4 x i32>* %4, align 4
+  // CHECK-NEXT:    %call = call i32 @_Z7extractIjLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(%struct.MyMatrix* dereferenceable(16) %Mat1)
+  // CHECK-NEXT:    store i32 %call, i32* %v1, align 4
+  // CHECK-NEXT:    ret void
+
+  // CHECK-LABEL: define linkonce_odr i32 @_Z7extractIjLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(%struct.MyMatrix* dereferenceable(16) %Mat)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %Mat.addr = alloca %struct.MyMatrix*, align 8
+  // CHECK-NEXT:    store %struct.MyMatrix* %Mat, %struct.MyMatrix** %Mat.addr, align 8
+  // CHECK-NEXT:    %0 = load %struct.MyMatrix*, %struct.MyMatrix** %Mat.addr, align 8
+  // CHECK-NEXT:    %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %0, i32 0, i32 0
+  // CHECK-NEXT:    %1 = bitcast [4 x i32]* %value to <4 x i32>*
+  // CHECK-NEXT:    %2 = load <4 x i32>, <4 x i32>* %1, align 4
+  // CHECK-NEXT:    %matext = extractelement <4 x i32> %2, i32 1
+  // CHECK-NEXT:    ret i32 %matext
+
+  MyMatrix<unsigned, 2, 2> Mat1;
+  Mat1.value = *((decltype(Mat1)::matrix_t *)Ptr1);
+  unsigned v1 = extract(Mat1);
+}
diff --git a/clang/test/Sema/matrix-type-operators.c b/clang/test/Sema/matrix-type-operators.c
new file mode 100644
--- /dev/null
+++ b/clang/test/Sema/matrix-type-operators.c
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 %s -fenable-matrix -pedantic -verify -triple=x86_64-apple-darwin9
+
+typedef float sx5x10_t __attribute__((matrix_type(5, 10)));
+
+void insert(sx5x10_t a, float f) {
+  // Non integer indexes.
+  a[3][f] = 0;
+  // expected-error@-1 {{matrix column index is not an integer}}
+  a[f][9] = 0;
+  // expected-error@-1 {{matrix row index is not an integer}}
+  a[f][f] = 0;
+  // expected-error@-1 {{matrix row index is not an integer}}
+
+  // Invalid element type.
+  a[3][4] = &f;
+  // expected-error@-1 {{assigning to 'float' from incompatible type 'float *'; remove &}}
+
+  // Indexes outside allowed dimensions.
+  a[-1][3] = 10.0;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  a[3][-1] = 10.0;
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  a[3][-1u] = 10.0;
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  a[-1u][3] = 10.0;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  a[5][2] = 10.0;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  a[4][10] = 10.0;
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  a[5][10.0] = f;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+
+  a[3] = 5.0;
+  // expected-error@-1 {{single subscript expressions are not allowed for matrix values}}
+}
+
+void extract(sx5x10_t a, float f) {
+  // Non integer indexes.
+  float v1 = a[3][f];
+  // expected-error@-1 {{matrix column index is not an integer}}
+  float v2 = a[f][9];
+  // expected-error@-1 {{matrix row index is not an integer}}
+  float v3 = a[f][f];
+  // expected-error@-1 {{matrix row index is not an integer}}
+
+  // Invalid element type.
+  char *v4 = a[3][4];
+  // expected-error@-1 {{initializing 'char *' with an expression of incompatible type 'float'}}
+
+  // Indexes outside allowed dimensions.
+  float v5 = a[-1][3];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  float v6 = a[3][-1];
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  float v8 = a[-1u][3];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  float v9 = a[5][2];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  float v10 = a[4][10];
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  float v11 = a[5][10.0];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+
+  float v12 = a[3];
+  // expected-error@-1 {{single subscript expressions are not allowed for matrix values}}
+}
diff --git a/clang/test/SemaCXX/matrix-type-operators.cpp b/clang/test/SemaCXX/matrix-type-operators.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/SemaCXX/matrix-type-operators.cpp
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 %s -fenable-matrix -pedantic -std=c++11 -verify -triple=x86_64-apple-darwin9
+
+typedef float sx5x10_t __attribute__((matrix_type(5, 10)));
+
+void insert(sx5x10_t a, float f) {
+  // Non integer indexes.
+  a[3][f] = 0;
+  // expected-error@-1 {{matrix column index is not an integer}}
+  a[f][9] = 0;
+  // expected-error@-1 {{matrix row index is not an integer}}
+  a[f][f] = 0;
+  // expected-error@-1 {{matrix row index is not an integer}}
+
+  // Invalid element type.
+  a[3][4] = &f;
+  // expected-error@-1 {{assigning to 'float' from incompatible type 'float *'; remove &}}
+
+  // Indexes outside allowed dimensions.
+  a[-1][3] = 10.0;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  a[3][-1] = 10.0;
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  a[3][-1u] = 10.0;
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  a[-1u][3] = 10.0;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  a[5][2] = 10.0;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  a[4][10] = 10.0;
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  a[5][10.0] = f;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+}
+
+void extract(sx5x10_t a, float f) {
+  // Non integer indexes.
+  float v1 = a[3][f];
+  // expected-error@-1 {{matrix column index is not an integer}}
+  float v2 = a[f][9];
+  // expected-error@-1 {{matrix row index is not an integer}}
+  float v3 = a[f][f];
+  // expected-error@-1 {{matrix row index is not an integer}}
+
+  // Invalid element type.
+  char *v4 = a[3][4];
+  // expected-error@-1 {{cannot initialize a variable of type 'char *' with an lvalue of type 'float'}}
+
+  // Indexes outside allowed dimensions.
+  float v5 = a[-1][3];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  float v6 = a[3][-1];
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  float v8 = a[-1u][3];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  float v9 = a[5][2];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  float v10 = a[4][10];
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  float v11 = a[5][10.0];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+}
+
+void incomplete_matrix_index_expr(sx5x10_t a, float f) {
+  float x = a[3];
+  // expected-error@-1 {{single subscript expressions are not allowed for matrix values}}
+  a[2] = f;
+  // expected-error@-1 {{single subscript expressions are not allowed for matrix values}}
+}