Index: lib/Target/ARM/ARM.td
===================================================================
--- lib/Target/ARM/ARM.td
+++ lib/Target/ARM/ARM.td
@@ -114,6 +114,9 @@
 def FeatureCRC            : SubtargetFeature<"crc", "HasCRC", "true",
                                              "Enable support for CRC instructions">;
 
+def FeatureDotProd        : SubtargetFeature<"dotprod", "HasDotProd", "true",
+                                             "Enable support for dot product instructions",
+                                             [FeatureNEON]>;
 
 // Not to be confused with FeatureHasRetAddrStack (return address stack)
 def FeatureRAS            : SubtargetFeature<"ras", "HasRAS", "true",
Index: lib/Target/ARM/ARMInstrInfo.td
===================================================================
--- lib/Target/ARM/ARMInstrInfo.td
+++ lib/Target/ARM/ARMInstrInfo.td
@@ -259,6 +259,8 @@
                                  AssemblerPredicate<"FeatureNEON", "NEON">;
 def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
                                  AssemblerPredicate<"FeatureCrypto", "crypto">;
+def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
+                                 AssemblerPredicate<"FeatureDotProd", "dotprod">;
 def HasCRC           : Predicate<"Subtarget->hasCRC()">,
                                  AssemblerPredicate<"FeatureCRC", "crc">;
 def HasRAS           : Predicate<"Subtarget->hasRAS()">,
@@ -5037,6 +5039,8 @@
   let Inst{15-12} = CRd;
   let Inst{19-16} = CRn;
   let Inst{23-20} = opc1;
+
+  let DecoderNamespace = "CoProc";
 }
 
 def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
@@ -5060,6 +5064,8 @@
   let Inst{15-12} = CRd;
   let Inst{19-16} = CRn;
   let Inst{23-20} = opc1;
+
+  let DecoderNamespace = "CoProc";
 }
 
 class ACI<dag oops, dag iops, string opc, string asm,
@@ -5075,6 +5081,8 @@
   let Inst{31-28} = 0b1111;
   let Inst{27-25} = 0b110;
 }
+
+let DecoderNamespace = "CoProc" in {
 multiclass LdStCop<bit load, bit Dbit, string asm, list<dag> pattern> {
   def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
                     asm, "\t$cop, $CRd, $addr", pattern> {
@@ -5228,6 +5236,8 @@
 defm STC2  : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
 defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
 
+} // DecoderNamespace = "CoProc"
+
 //===----------------------------------------------------------------------===//
 // Move between coprocessor and ARM core register.
 //
@@ -5252,6 +5262,8 @@
   let Inst{7-5}   = opc2;
   let Inst{3-0}   = CRm;
   let Inst{19-16} = CRn;
+
+  let DecoderNamespace = "CoProc";
 }
 
 def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
@@ -5296,6 +5308,8 @@
   let Inst{7-5}   = opc2;
   let Inst{3-0}   = CRm;
   let Inst{19-16} = CRn;
+
+  let DecoderNamespace = "CoProc";
 }
 
 def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
Index: lib/Target/ARM/ARMInstrNEON.td
===================================================================
--- lib/Target/ARM/ARMInstrNEON.td
+++ lib/Target/ARM/ARMInstrNEON.td
@@ -4672,6 +4672,42 @@
           (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
       Requires<[HasVFP4]>;
 
+// ARMv8.2a dot product instructions.
+// We put them in the VFPV8 decoder namespace because the ARM and Thumb
+// encodings are the same and thus no further bit twiddling is necessary
+// in the disassembler.
+let Predicates = [HasDotProd], DecoderNamespace = "VFPV8" in {
+
+def VUDOTD : N3Vnp<0b11000, 0b10, 0b1101, 0b0, 0b1,
+                  (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm),
+                  N3RegFrm, IIC_VDOTPROD, "vudot", "u8", []>;
+def VSDOTD : N3Vnp<0b11000, 0b10, 0b1101, 0b0, 0b0,
+                  (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm),
+                  N3RegFrm, IIC_VDOTPROD, "vsdot", "s8", []>;
+def VUDOTQ : N3Vnp<0b11000, 0b10, 0b1101, 0b1, 0b1,
+                  (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm),
+                  N3RegFrm, IIC_VDOTPROD, "vudot", "u8", []>;
+def VSDOTQ : N3Vnp<0b11000, 0b10, 0b1101, 0b1, 0b0,
+                  (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm),
+                  N3RegFrm, IIC_VDOTPROD, "vsdot", "s8", []>;
+
+// Indexed dot product instructions:
+class DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty> :
+  N3Vnp<0b11100, 0b10, 0b1101, Q, U,
+       (outs Ty:$Vd), (ins Ty:$Vn, DPR:$Vm, VectorIndex32:$lane),
+       N3RegFrm, IIC_VDOTPROD, opc, dt, []> {
+  bit lane;
+  let Inst{5} = lane;
+  let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane");
+}
+
+def VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR>;
+def VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR>;
+def VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR>;
+def VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR>;
+
+}  // HasDotProd
+
 // Vector Subtract Operations.
 
 //   VSUB     : Vector Subtract (integer and floating-point)
Index: lib/Target/ARM/ARMInstrThumb2.td
===================================================================
--- lib/Target/ARM/ARMInstrThumb2.td
+++ lib/Target/ARM/ARMInstrThumb2.td
@@ -3964,6 +3964,7 @@
   }
 }
 
+let DecoderNamespace = "Thumb2CoProc" in {
 defm t2LDC   : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
 defm t2LDCL  : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
 defm t2LDC2  : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
@@ -3973,6 +3974,7 @@
 defm t2STCL  : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
 defm t2STC2  : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
 defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
+}
 
 
 //===----------------------------------------------------------------------===//
@@ -4125,6 +4127,8 @@
   let Inst{7-5}   = opc2;
   let Inst{3-0}   = CRm;
   let Inst{19-16} = CRn;
+
+  let DecoderNamespace = "Thumb2CoProc";
 }
 
 class t2MovRRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
@@ -4145,6 +4149,8 @@
   let Inst{11-8}  = cop;
   let Inst{7-4}   = opc1;
   let Inst{3-0}   = CRm;
+
+  let DecoderNamespace = "Thumb2CoProc";
 }
 
 /* from ARM core register to coprocessor */
@@ -4243,6 +4249,7 @@
   let Inst{23-20} = opc1;
 
   let Predicates = [IsThumb2, PreV8];
+  let DecoderNamespace = "Thumb2CoProc";
 }
 
 def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
@@ -4268,6 +4275,7 @@
   let Inst{23-20} = opc1;
 
   let Predicates = [IsThumb2, PreV8];
+  let DecoderNamespace = "Thumb2CoProc";
 }
 
 
Index: lib/Target/ARM/ARMSchedule.td
===================================================================
--- lib/Target/ARM/ARMSchedule.td
+++ lib/Target/ARM/ARMSchedule.td
@@ -414,6 +414,7 @@
 def IIC_VTBX2      : InstrItinClass;
 def IIC_VTBX3      : InstrItinClass;
 def IIC_VTBX4      : InstrItinClass;
+def IIC_VDOTPROD   : InstrItinClass;
 
 //===----------------------------------------------------------------------===//
 // Processor instruction itineraries.
Index: lib/Target/ARM/ARMSubtarget.h
===================================================================
--- lib/Target/ARM/ARMSubtarget.h
+++ lib/Target/ARM/ARMSubtarget.h
@@ -156,6 +156,9 @@
   bool HasFPARMv8 = false;
   bool HasNEON = false;
 
+  /// HasDotProd - True if the ARMv8.2A dot product instructions are supported.
+  bool HasDotProd = false;
+
   /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
   /// specified. Use the method useNEONForSinglePrecisionFP() to
   /// determine if NEON should actually be used.
@@ -521,6 +524,7 @@
   bool hasFPARMv8() const { return HasFPARMv8; }
   bool hasNEON() const { return HasNEON;  }
   bool hasCrypto() const { return HasCrypto; }
+  bool hasDotProd() const { return HasDotProd; }
   bool hasCRC() const { return HasCRC; }
   bool hasRAS() const { return HasRAS; }
   bool hasVirtualization() const { return HasVirtualization; }
Index: lib/Target/ARM/AsmParser/ARMAsmParser.cpp
===================================================================
--- lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -5348,7 +5348,8 @@
       Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" ||
       Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
       Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
-      Mnemonic == "bxns"  || Mnemonic == "blxns")
+      Mnemonic == "bxns"  || Mnemonic == "blxns" ||
+      Mnemonic == "vudot" || Mnemonic == "vsdot")
     return Mnemonic;
 
   // First, split out any predication code. Ignore mnemonics we know aren't
@@ -5454,7 +5455,8 @@
       Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" ||
       Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
       (FullInst.startswith("vmull") && FullInst.endswith(".p64")) ||
-      Mnemonic == "vmovx" || Mnemonic == "vins") {
+      Mnemonic == "vmovx" || Mnemonic == "vins" ||
+      Mnemonic == "vudot" || Mnemonic == "vsdot") {
     // These mnemonics are never predicable
     CanAcceptPredicationCode = false;
   } else if (!isThumb()) {
Index: lib/Target/ARM/Disassembler/ARMDisassembler.cpp
===================================================================
--- lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -486,6 +486,13 @@
     }
   }
 
+  Result =
+      decodeInstruction(DecoderTableCoProc32, MI, Insn, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
+    Size = 4;
+    return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result);
+  }
+
   Size = 4;
   return MCDisassembler::Fail;
 }
@@ -821,6 +828,14 @@
     }
   }
 
+  Result =
+      decodeInstruction(DecoderTableThumb2CoProc32, MI, Insn32, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
+    Size = 4;
+    Check(Result, AddThumbPredicate(MI));
+    return Result;
+  }
+
   Size = 0;
   return MCDisassembler::Fail;
 }
Index: test/MC/ARM/armv8.2a-dotprod-a32.s
===================================================================
--- /dev/null
+++ test/MC/ARM/armv8.2a-dotprod-a32.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple arm -mattr=+dotprod -show-encoding < %s | FileCheck %s  --check-prefix=CHECK
+
+// RUN: not llvm-mc -triple arm -mattr=-dotprod -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
+// RUN: not llvm-mc -triple arm -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
+// RUN: not llvm-mc -triple arm -mattr=+v8.1a -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
+// RUN: not llvm-mc -triple arm -mattr=+v8.2a -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
+
+vudot.u8 d0, d1, d2
+vsdot.s8 d0, d1, d2
+vudot.u8 q0, q1, q4
+vsdot.s8 q0, q1, q4
+vudot.u8 d0, d1, d2[0]
+vsdot.s8 d0, d1, d2[1]
+vudot.u8 q0, q1, d4[0]
+vsdot.s8 q0, q1, d4[1]
+
+// CHECK: vudot.u8  d0, d1, d2      @ encoding: [0x12,0x0d,0x21,0xfc]
+// CHECK: vsdot.s8  d0, d1, d2      @ encoding: [0x02,0x0d,0x21,0xfc]
+// CHECK: vudot.u8  q0, q1, q4      @ encoding: [0x58,0x0d,0x22,0xfc]
+// CHECK: vsdot.s8  q0, q1, q4      @ encoding: [0x48,0x0d,0x22,0xfc]
+// CHECK: vudot.u8  d0, d1, d2[0]   @ encoding: [0x12,0x0d,0x21,0xfe]
+// CHECK: vsdot.s8  d0, d1, d2[1]   @ encoding: [0x22,0x0d,0x21,0xfe]
+// CHECK: vudot.u8  q0, q1, d4[0]   @ encoding: [0x54,0x0d,0x22,0xfe]
+// CHECK: vsdot.s8  q0, q1, d4[1]   @ encoding: [0x64,0x0d,0x22,0xfe]
+
+// CHECK-NO-DOTPROD: error: instruction requires: dotprod
+// CHECK-NO-DOTPROD: error: instruction requires: dotprod
+// CHECK-NO-DOTPROD: error: instruction requires: dotprod
+// CHECK-NO-DOTPROD: error: instruction requires: dotprod
+// CHECK-NO-DOTPROD: error: instruction requires: dotprod
+// CHECK-NO-DOTPROD: error: instruction requires: dotprod
+// CHECK-NO-DOTPROD: error: instruction requires: dotprod
+// CHECK-NO-DOTPROD: error: instruction requires: dotprod
Index: test/MC/ARM/armv8.2a-dotprod-error.s
===================================================================
--- /dev/null
+++ test/MC/ARM/armv8.2a-dotprod-error.s
@@ -0,0 +1,14 @@
+// RUN: not llvm-mc -triple arm -mattr=+dotprod -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+// RUN: not llvm-mc -triple thumb -mattr=+dotprod -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+
+vudot.u8 d0, d1, d2[2]
+vsdot.s8 d0, d1, d2[2]
+vudot.u8 q0, q1, d4[2]
+vsdot.s8 q0, q1, d4[2]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: invalid operand for instruction
Index: test/MC/ARM/armv8.2a-dotprod-t32.s
===================================================================
--- /dev/null
+++ test/MC/ARM/armv8.2a-dotprod-t32.s
@@ -0,0 +1,38 @@
+// RUN: llvm-mc -triple thumb -mattr=+dotprod -show-encoding < %s | FileCheck %s  --check-prefix=CHECK
+
+// RUN: not llvm-mc -triple thumb -mattr=-dotprod -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+// RUN: not llvm-mc -triple thumb -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+// RUN: not llvm-mc -triple thumb -mattr=+v8.1a -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+// RUN: not llvm-mc -triple thumb -mattr=+v8.2a -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+
+  vudot.u8  d0, d1, d2
+  vsdot.s8  d0, d1, d2
+  vudot.u8  q0, q1, q4
+  vsdot.s8  q0, q1, q4
+  vudot.u8  d0, d1, d2[0]
+  vsdot.s8  d0, d1, d2[1]
+  vudot.u8  q0, q1, d4[0]
+  vsdot.s8  q0, q1, d4[1]
+
+//CHECK:  vudot.u8  d0, d1, d2      @ encoding: [0x21,0xfc,0x12,0x0d]
+//CHECK:  vsdot.s8  d0, d1, d2      @ encoding: [0x21,0xfc,0x02,0x0d]
+//CHECK:  vudot.u8  q0, q1, q4      @ encoding: [0x22,0xfc,0x58,0x0d]
+//CHECK:  vsdot.s8  q0, q1, q4      @ encoding: [0x22,0xfc,0x48,0x0d]
+//CHECK:  vudot.u8  d0, d1, d2[0]   @ encoding: [0x21,0xfe,0x12,0x0d]
+//CHECK:  vsdot.s8  d0, d1, d2[1]   @ encoding: [0x21,0xfe,0x22,0x0d]
+//CHECK:  vudot.u8  q0, q1, d4[0]   @ encoding: [0x22,0xfe,0x54,0x0d]
+//CHECK:  vsdot.s8  q0, q1, d4[1]   @ encoding: [0x22,0xfe,0x64,0x0d]
+
+//CHECK-ERROR: error: instruction requires: dotprod
+//CHECK-ERROR: error: instruction requires: dotprod
+//CHECK-ERROR: error: instruction requires: dotprod
+//CHECK-ERROR: error: instruction requires: dotprod
+//CHECK-ERROR: error: instruction requires: dotprod
+//CHECK-ERROR: error: instruction requires: dotprod
+//CHECK-ERROR: error: instruction requires: dotprod
+//CHECK-ERROR: error: instruction requires: dotprod
+
Index: test/MC/Disassembler/ARM/armv8.2a-dotprod-a32.s
===================================================================
--- /dev/null
+++ test/MC/Disassembler/ARM/armv8.2a-dotprod-a32.s
@@ -0,0 +1,33 @@
+# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+dotprod --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=-dotprod --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+
+0x12,0x0d,0x21,0xfc
+0x02,0x0d,0x21,0xfc
+0x58,0x0d,0x22,0xfc
+0x48,0x0d,0x22,0xfc
+0x12,0x0d,0x21,0xfe
+0x22,0x0d,0x21,0xfe
+0x54,0x0d,0x22,0xfe
+0x64,0x0d,0x22,0xfe
+
+#CHECK: vudot.u8  d0, d1, d2
+#CHECK: vsdot.s8  d0, d1, d2
+#CHECK: vudot.u8  q0, q1, q4
+#CHECK: vsdot.s8  q0, q1, q4
+#CHECK: vudot.u8  d0, d1, d2[0]
+#CHECK: vsdot.s8  d0, d1, d2[1]
+#CHECK: vudot.u8  q0, q1, d4[0]
+#CHECK: vsdot.s8  q0, q1, d4[1]
+
+# without dot product enabled, the instructions get disassembled to these
+# coprocessor instructions:
+
+#CHECK-ERROR: stc2  p13, c0, [r1], #-72
+#CHECK-ERROR: stc2  p13, c0, [r1], #-8
+#CHECK-ERROR: stc2  p13, c0, [r2], #-352
+#CHECK-ERROR: stc2  p13, c0, [r2], #-288
+#CHECK-ERROR: mcr2  p13, #1, r0, c1, c2, #0
+#CHECK-ERROR: cdp2  p13, #2, c0, c1, c2, #1
+#CHECK-ERROR: mcr2  p13, #1, r0, c2, c4, #2
+#CHECK-ERROR: cdp2  p13, #2, c0, c2, c4, #3
+
Index: test/MC/Disassembler/ARM/armv8.2a-dotprod-t32.s
===================================================================
--- /dev/null
+++ test/MC/Disassembler/ARM/armv8.2a-dotprod-t32.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple thumbv7a -mattr=+dotprod --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple thumbv7a -mattr=-dotprod --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+
+[0x21,0xfc,0x12,0x0d]
+[0x21,0xfc,0x02,0x0d]
+[0x22,0xfc,0x58,0x0d]
+[0x22,0xfc,0x48,0x0d]
+[0x21,0xfe,0x12,0x0d]
+[0x21,0xfe,0x22,0x0d]
+[0x22,0xfe,0x54,0x0d]
+[0x22,0xfe,0x64,0x0d]
+
+#CHECK: vudot.u8  d0, d1, d2
+#CHECK: vsdot.s8  d0, d1, d2
+#CHECK: vudot.u8  q0, q1, q4
+#CHECK: vsdot.s8  q0, q1, q4
+#CHECK: vudot.u8  d0, d1, d2[0]
+#CHECK: vsdot.s8  d0, d1, d2[1]
+#CHECK: vudot.u8  q0, q1, d4[0]
+#CHECK: vsdot.s8  q0, q1, d4[1]
+
+#CHECK-ERROR:  stc2  p13, c0, [r1], #-72
+#CHECK-ERROR:  stc2  p13, c0, [r1], #-8
+#CHECK-ERROR:  stc2  p13, c0, [r2], #-352
+#CHECK-ERROR:  stc2  p13, c0, [r2], #-288
+#CHECK-ERROR:  mcr2  p13, #1, r0, c1, c2, #0
+#CHECK-ERROR:  cdp2  p13, #2, c0, c1, c2, #1
+#CHECK-ERROR:  mcr2  p13, #1, r0, c2, c4, #2
+#CHECK-ERROR:  cdp2  p13, #2, c0, c2, c4, #3