diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -477,6 +477,21 @@
   return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;
 }
 
+static inline bool isVPTOpcode(int Opc) {
+  return Opc == ARM::t2VPTv16i8 || Opc == ARM::t2VPTv16u8 ||
+         Opc == ARM::t2VPTv16s8 || Opc == ARM::t2VPTv8i16 ||
+         Opc == ARM::t2VPTv8u16 || Opc == ARM::t2VPTv8s16 ||
+         Opc == ARM::t2VPTv4i32 || Opc == ARM::t2VPTv4u32 ||
+         Opc == ARM::t2VPTv4s32 || Opc == ARM::t2VPTv4f32 ||
+         Opc == ARM::t2VPTv8f16 || Opc == ARM::t2VPTv16i8r ||
+         Opc == ARM::t2VPTv16u8r || Opc == ARM::t2VPTv16s8r ||
+         Opc == ARM::t2VPTv8i16r || Opc == ARM::t2VPTv8u16r ||
+         Opc == ARM::t2VPTv8s16r || Opc == ARM::t2VPTv4i32r ||
+         Opc == ARM::t2VPTv4u32r || Opc == ARM::t2VPTv4s32r ||
+         Opc == ARM::t2VPTv4f32r || Opc == ARM::t2VPTv8f16r ||
+         Opc == ARM::t2VPST;
+}
+
 static inline
 bool isCondBranchOpcode(int Opc) {
   return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -185,6 +185,86 @@
   let DecoderMethod = "DecodeCCOutOperand";
 }
 
+// VPT predicate
+
+def VPTPredNOperand : AsmOperandClass {
+  let Name = "VPTPredN";
+  let PredicateMethod = "isVPTPred";
+}
+def VPTPredROperand : AsmOperandClass {
+  let Name = "VPTPredR";
+  let PredicateMethod = "isVPTPred";
+}
+def undef_tied_input;
+
+// Operand classes for the cluster of MC operands describing a
+// VPT-predicated MVE instruction.
+//
+// There are two of these classes. Both of them have the same first
+// two options:
+//
+// $cond (an integer) indicates the instruction's predication status:
+//   * ARMVCC::None means it's unpredicated
+//   * ARMVCC::Then means it's in a VPT block and appears with the T suffix
+//   * ARMVCC::Else means it's in a VPT block and appears with the E suffix.
+// During code generation, unpredicated and predicated instructions
+// are indicated by setting this parameter to 'None' or to 'Then'; the
+// third value 'Else' is only used for assembly and disassembly.
+//
+// $cond_reg (type VCCR) gives the input predicate register. This is
+// always either zero_reg or VPR, but needs to be modelled as an
+// explicit operand so that it can be register-allocated and spilled
+// when these operands are used in code generation).
+//
+// For 'vpred_r', there's an extra operand $inactive, which specifies
+// the vector register which will supply any lanes of the output
+// register that the predication mask prevents from being written by
+// this instruction. It's always tied to the actual output register
+// (i.e. must be allocated into the same physical reg), but again,
+// code generation will need to model it as a separate input value.
+//
+// 'vpred_n' doesn't have that extra operand: it only has $cond and
+// $cond_reg. This variant is used for any instruction that can't, or
+// doesn't want to, tie $inactive to the output register. Sometimes
+// that's because another input parameter is already tied to it (e.g.
+// instructions that both read and write their Qd register even when
+// unpredicated, either because they only partially overwrite it like
+// a narrowing integer conversion, or simply because the instruction
+// encoding doesn't have enough register fields to make the output
+// independent of all inputs). It can also be because the instruction
+// is defined to set disabled output lanes to zero rather than leaving
+// them unchanged (vector loads), or because it doesn't output a
+// vector register at all (stores, compares). In any of these
+// situations it's unnecessary to have an extra operand tied to the
+// output, and inconvenient to leave it there unused.
+
+// Base class for both kinds of vpred.
+class vpred_ops<dag extra_op, dag extra_mi> : OperandWithDefaultOps<OtherVT,
+            !con((ops (i32 0), (i32 zero_reg)), extra_op)> {
+  let PrintMethod = "printVPTPredicateOperand";
+  let OperandNamespace = "ARM";
+  let MIOperandInfo = !con((ops i32imm:$cond, VCCR:$cond_reg), extra_mi);
+
+  // For convenience, we provide a string value that can be appended
+  // to the constraints string. It's empty for vpred_n, and for
+  // vpred_r it ties the $inactive operand to the output q-register
+  // (which by convention will be called $Qd).
+  string vpred_constraint;
+}
+
+def vpred_r : vpred_ops<(ops (v4i32 undef_tied_input)), (ops MQPR:$inactive)> {
+  let ParserMatchClass = VPTPredROperand;
+  let OperandType = "OPERAND_VPRED_R";
+  let DecoderMethod = "DecodeVpredROperand";
+  let vpred_constraint = ",$Qd = $vp.inactive";
+}
+
+def vpred_n : vpred_ops<(ops), (ops)> {
+  let ParserMatchClass = VPTPredNOperand;
+  let OperandType = "OPERAND_VPRED_N";
+  let vpred_constraint = "";
+}
+
 // ARM special operands for disassembly only.
 //
 def SetEndAsmOperand : ImmAsmOperand<0,1> {
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1,3 +1,97 @@
+// VPT condition mask
+def vpt_mask : Operand<i32> {
+  let PrintMethod = "printVPTMask";
+  let ParserMatchClass = it_mask_asmoperand;
+  let EncoderMethod = "getVPTMaskOpValue";
+  let DecoderMethod = "DecodeVPTMaskOperand";
+}
+
+// VPT/VCMP restricted predicate for sign invariant types
+def pred_restricted_i_asmoperand : AsmOperandClass {
+  let Name = "CondCodeRestrictedI";
+  let RenderMethod = "addITCondCodeOperands";
+  let PredicateMethod = "isITCondCodeRestrictedI";
+  let ParserMethod = "parseITCondCode";
+}
+
+// VPT/VCMP restricted predicate for signed types
+def pred_restricted_s_asmoperand : AsmOperandClass {
+  let Name = "CondCodeRestrictedS";
+  let RenderMethod = "addITCondCodeOperands";
+  let PredicateMethod = "isITCondCodeRestrictedS";
+  let ParserMethod = "parseITCondCode";
+}
+
+// VPT/VCMP restricted predicate for unsigned types
+def pred_restricted_u_asmoperand : AsmOperandClass {
+  let Name = "CondCodeRestrictedU";
+  let RenderMethod = "addITCondCodeOperands";
+  let PredicateMethod = "isITCondCodeRestrictedU";
+  let ParserMethod = "parseITCondCode";
+}
+
+// VPT/VCMP restricted predicate for floating point
+def pred_restricted_fp_asmoperand : AsmOperandClass {
+  let Name = "CondCodeRestrictedFP";
+  let RenderMethod = "addITCondCodeOperands";
+  let PredicateMethod = "isITCondCodeRestrictedFP";
+  let ParserMethod = "parseITCondCode";
+}
+
+def pred_basic_i : Operand<i32> {
+  let PrintMethod = "printMandatoryRestrictedPredicateOperand";
+  let ParserMatchClass = pred_restricted_i_asmoperand;
+  let DecoderMethod = "DecodeRestrictedIPredicateOperand";
+  let EncoderMethod = "getRestrictedCondCodeOpValue";
+}
+
+def pred_basic_u : Operand<i32> {
+  let PrintMethod = "printMandatoryRestrictedPredicateOperand";
+  let ParserMatchClass = pred_restricted_u_asmoperand;
+  let DecoderMethod = "DecodeRestrictedUPredicateOperand";
+  let EncoderMethod = "getRestrictedCondCodeOpValue";
+}
+
+def pred_basic_s : Operand<i32> {
+  let PrintMethod = "printMandatoryRestrictedPredicateOperand";
+  let ParserMatchClass = pred_restricted_s_asmoperand;
+  let DecoderMethod = "DecodeRestrictedSPredicateOperand";
+  let EncoderMethod = "getRestrictedCondCodeOpValue";
+}
+
+def pred_basic_fp : Operand<i32> {
+  let PrintMethod = "printMandatoryRestrictedPredicateOperand";
+  let ParserMatchClass = pred_restricted_fp_asmoperand;
+  let DecoderMethod = "DecodeRestrictedFPPredicateOperand";
+  let EncoderMethod = "getRestrictedCondCodeOpValue";
+}
+
+class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
+             string ops, string cstr, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, 4, itin, !strconcat(asm, "\t", ops), cstr,
+             pattern>,
+    Requires<[HasMVEInt]> {
+  let D = MVEDomain;
+  let DecoderNamespace = "MVE";
+}
+
+// MVE_p is used for most predicated instructions, to add the cluster
+// of input operands that provides the VPT suffix (none, T or E) and
+// the input predicate register.
+class MVE_p<dag oops, dag iops, InstrItinClass itin, string iname,
+            string suffix, string ops, vpred_ops vpred, string cstr,
+            list<dag> pattern=[]>
+  : MVE_MI<oops, !con(iops, (ins vpred:$vp)), itin,
+           // If the instruction has a suffix, like vadd.f32, then the
+           // VPT predication suffix goes before the dot, so the full
+           // name has to be "vadd${vp}.f32".
+           !strconcat(iname, "${vp}",
+                      !if(!eq(suffix, ""), "", !strconcat(".", suffix))),
+           ops, !strconcat(cstr, vpred.vpred_constraint), pattern> {
+  let Inst{31-29} = 0b111;
+  let Inst{27-26} = 0b11;
+}
+
 class MVE_MI_with_pred<dag oops, dag iops, InstrItinClass itin, string asm,
                        string ops, string cstr, list<dag> pattern>
   : Thumb2I<oops, iops, AddrModeNone, 4, itin, asm, !strconcat("\t", ops), cstr,
@@ -164,3 +258,272 @@
   let Inst{5-4} = 0b01;
   let Inst{16} = 0b1;
 }
+
+// start of mve_rDest instructions
+
+class MVE_rDest<dag oops, dag iops, InstrItinClass itin, string iname, string suffix,
+                string ops, string cstr, list<dag> pattern=[]>
+// Always use vpred_n and not vpred_r: with the output register being
+// a GPR and not a vector register, there can't be any question of
+// what to put in its inactive lanes.
+  : MVE_p<oops, iops, itin, iname, suffix, ops, vpred_n, cstr, pattern> {
+
+  let Inst{25-23} = 0b101;
+  let Inst{11-9} = 0b111;
+  let Inst{4} = 0b0;
+}
+
+class t2VABAV<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+  : MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm),
+              NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src",
+              pattern> {
+  bits<4> Qm;
+  bits<4> Qn;
+  bits<4> Rda;
+
+  let Inst{28} = U;
+  let Inst{22} = 0b0;
+  let Inst{21-20} = size{1-0};
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{15-12} = Rda{3-0};
+  let Inst{8} = 0b1;
+  let Inst{7} = Qn{3};
+  let Inst{6} = 0b0;
+  let Inst{5} = Qm{3};
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b1;
+}
+
+def VABAVs8  : t2VABAV<"s8", 0b0, 0b00>;
+def VABAVs16 : t2VABAV<"s16", 0b0, 0b01>;
+def VABAVs32 : t2VABAV<"s32", 0b0, 0b10>;
+def VABAVu8  : t2VABAV<"u8", 0b1, 0b00>;
+def VABAVu16 : t2VABAV<"u16", 0b1, 0b01>;
+def VABAVu32 : t2VABAV<"u32", 0b1, 0b10>;
+
+// end of mve_rDest instructions
+
+// start of mve_comp instructions
+
+class MVE_comp<InstrItinClass itin, string iname, string suffix,
+               string cstr, list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), itin, iname, suffix,
+           "$Qd, $Qn, $Qm", vpred_r, cstr, pattern> {
+  bits<4> Qd;
+  bits<4> Qn;
+  bits<4> Qm;
+
+  let Inst{22} = Qd{3};
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12} = 0b0;
+  let Inst{10-9} = 0b11;
+  let Inst{7} = Qn{3};
+  let Inst{5} = Qm{3};
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b0;
+}
+
+class VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
+                list<dag> pattern=[]>
+  : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
+
+  let Inst{28} = 0b1;
+  let Inst{25-24} = 0b11;
+  let Inst{23} = 0b0;
+  let Inst{21} = bit_21;
+  let Inst{20} = sz;
+  let Inst{11} = 0b1;
+  let Inst{8} = 0b1;
+  let Inst{6} = 0b1;
+  let Inst{4} = 0b1;
+
+  let Predicates = [HasMVEFloat];
+}
+
+def VMAXNMf32 : VMINMAXNM<"vmaxnm", "f32", 0b0, 0b0>;
+def VMAXNMf16 : VMINMAXNM<"vmaxnm", "f16", 0b1, 0b0>;
+
+def VMINNMf32 : VMINMAXNM<"vminnm", "f32", 0b0, 0b1>;
+def VMINNMf16 : VMINMAXNM<"vminnm", "f16", 0b1, 0b1>;
+
+// end of mve_comp instructions
+
+class t2VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> pattern=[]>
+  : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, "", pattern> {
+  bits<3> fc;
+  bits<4> Mk;
+  bits<3> Qn;
+
+  let Inst{31-23} = 0b111111100;
+  let Inst{22} = Mk{3};
+  let Inst{21-20} = size;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b1;
+  let Inst{15-13} = Mk{2-0};
+  let Inst{12} = fc{2};
+  let Inst{11-8} = 0b1111;
+  let Inst{7} = fc{0};
+  let Inst{4} = 0b0;
+
+  let Defs = [VPR, P0];
+}
+
+class t2VPTt1<string suffix, bits<2> size, dag iops>
+  : t2VPT<suffix, size, iops, "$fc, $Qn, $Qm"> {
+  bits<4> Qm;
+  bits<4> Mk;
+
+  let Inst{6} = 0b0;
+  let Inst{5} = Qm{3};
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = fc{1};
+}
+
+class t2VPTt1i<string suffix, bits<2> size>
+ : t2VPTt1<suffix, size,
+           (ins vpt_mask:$Mk, pred_basic_i:$fc, MQPR:$Qn, MQPR:$Qm)> {
+  let Inst{12} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+def t2VPTv4i32 : t2VPTt1i<"i32", 0b10>;
+def t2VPTv8i16 : t2VPTt1i<"i16", 0b01>;
+def t2VPTv16i8 : t2VPTt1i<"i8", 0b00>;
+
+class t2VPTt1u<string suffix, bits<2> size>
+ : t2VPTt1<suffix, size,
+           (ins vpt_mask:$Mk, pred_basic_u:$fc, MQPR:$Qn, MQPR:$Qm)> {
+  let Inst{12} = 0b0;
+  let Inst{0} = 0b1;
+}
+
+def t2VPTv4u32 : t2VPTt1u<"u32", 0b10>;
+def t2VPTv8u16 : t2VPTt1u<"u16", 0b01>;
+def t2VPTv16u8 : t2VPTt1u<"u8", 0b00>;
+
+class t2VPTt1s<string suffix, bits<2> size>
+ : t2VPTt1<suffix, size,
+           (ins vpt_mask:$Mk, pred_basic_s:$fc, MQPR:$Qn, MQPR:$Qm)> {
+  let Inst{12} = 0b1;
+}
+
+def t2VPTv4s32 : t2VPTt1s<"s32", 0b10>;
+def t2VPTv8s16 : t2VPTt1s<"s16", 0b01>;
+def t2VPTv16s8 : t2VPTt1s<"s8", 0b00>;
+
+class t2VPTt2<string suffix, bits<2> size, dag iops>
+  : t2VPT<suffix, size, iops,
+          "$fc, $Qn, $Rm"> {
+  bits<4> Rm;
+  bits<3> fc;
+  bits<4> Mk;
+
+  let Inst{6} = 0b1;
+  let Inst{5} = fc{1};
+  let Inst{3-0} = Rm{3-0};
+}
+
+class t2VPTt2i<string suffix, bits<2> size>
+  : t2VPTt2<suffix, size,
+            (ins vpt_mask:$Mk, pred_basic_i:$fc, MQPR:$Qn, GPRwithZR:$Rm)> {
+  let Inst{12} = 0b0;
+  let Inst{5} = 0b0;
+}
+
+def t2VPTv4i32r : t2VPTt2i<"i32", 0b10>;
+def t2VPTv8i16r : t2VPTt2i<"i16", 0b01>;
+def t2VPTv16i8r : t2VPTt2i<"i8", 0b00>;
+
+class t2VPTt2u<string suffix, bits<2> size>
+  : t2VPTt2<suffix, size,
+            (ins vpt_mask:$Mk, pred_basic_u:$fc, MQPR:$Qn, GPRwithZR:$Rm)> {
+  let Inst{12} = 0b0;
+  let Inst{5} = 0b1;
+}
+
+def t2VPTv4u32r : t2VPTt2u<"u32", 0b10>;
+def t2VPTv8u16r : t2VPTt2u<"u16", 0b01>;
+def t2VPTv16u8r : t2VPTt2u<"u8", 0b00>;
+
+class t2VPTt2s<string suffix, bits<2> size>
+  : t2VPTt2<suffix, size,
+            (ins vpt_mask:$Mk, pred_basic_s:$fc, MQPR:$Qn, GPRwithZR:$Rm)> {
+  let Inst{12} = 0b1;
+}
+
+def t2VPTv4s32r : t2VPTt2s<"s32", 0b10>;
+def t2VPTv8s16r : t2VPTt2s<"s16", 0b01>;
+def t2VPTv16s8r : t2VPTt2s<"s8", 0b00>;
+
+
+class t2VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern=[]>
+  : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm,
+            "", pattern> {
+  bits<3> fc;
+  bits<4> Mk;
+  bits<3> Qn;
+
+  let Inst{31-29} = 0b111;
+  let Inst{28} = size;
+  let Inst{27-23} = 0b11100;
+  let Inst{22} = Mk{3};
+  let Inst{21-20} = 0b11;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b1;
+  let Inst{15-13} = Mk{2-0};
+  let Inst{12} = fc{2};
+  let Inst{11-8} = 0b1111;
+  let Inst{7} = fc{0};
+  let Inst{4} = 0b0;
+
+  let Defs = [P0];
+  let Predicates = [HasMVEFloat];
+}
+
+class t2VPTft1<string suffix, bit size>
+  : t2VPTf<suffix, size, (ins vpt_mask:$Mk, pred_basic_fp:$fc, MQPR:$Qn, MQPR:$Qm),
+          "$fc, $Qn, $Qm"> {
+  bits<3> fc;
+  bits<4> Qm;
+
+  let Inst{6} = 0b0;
+  let Inst{5} = Qm{3};
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = fc{1};
+}
+
+def t2VPTv4f32         : t2VPTft1<"f32", 0b0>;
+def t2VPTv8f16         : t2VPTft1<"f16", 0b1>;
+
+class t2VPTft2<string suffix, bit size>
+  : t2VPTf<suffix, size, (ins vpt_mask:$Mk, pred_basic_fp:$fc, MQPR:$Qn, GPRwithZR:$Rm),
+          "$fc, $Qn, $Rm"> {
+  bits<3> fc;
+  bits<4> Rm;
+
+  let Inst{6} = 0b1;
+  let Inst{5} = fc{1};
+  let Inst{3-0} = Rm{3-0};
+}
+
+def t2VPTv4f32r        : t2VPTft2<"f32", 0b0>;
+def t2VPTv8f16r        : t2VPTft2<"f16", 0b1>;
+
+def t2VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary,
+       !strconcat("vpst", "${Mk}"), "", "", []> {
+  bits<4> Mk;
+
+  let Inst{31-23} = 0b111111100;
+  let Inst{22} = Mk{3};
+  let Inst{21-16} = 0b110001;
+  let Inst{15-13} = Mk{2-0};
+  let Inst{12-0} = 0b0111101001101;
+  let Unpredictable{12} = 0b1;
+  let Unpredictable{7} = 0b1;
+  let Unpredictable{5} = 0b1;
+
+  let Defs = [P0];
+}
diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
--- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -160,6 +160,10 @@
          "Subclass not added?");
   assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) &&
          "Subclass not added?");
+  assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPREven_and_tGPR_and_tcGPRRegClassID)) &&
+         "Subclass not added?");
+  assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPROdd_and_tcGPRRegClassID)) &&
+         "Subclass not added?");
   assert(RBGPR.getSize() == 32 && "GPRs should hold up to 32-bit");
 
 #ifndef NDEBUG
@@ -181,6 +185,10 @@
   case tGPR_and_tcGPRRegClassID:
   case tcGPRRegClassID:
   case tGPRRegClassID:
+  case tGPREven_and_tGPR_and_tcGPRRegClassID:
+  case tGPROdd_and_tcGPRRegClassID:
+  case tGPR_and_tGPREvenRegClassID:
+  case tGPR_and_tGPROddRegClassID:
     return getRegBank(ARM::GPRRegBankID);
   case HPRRegClassID:
   case SPR_8RegClassID:
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -436,8 +436,10 @@
 def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
                         (sequence "Q%u", 0, 15)> {
   // Allocate non-VFP2 aliases Q8-Q15 first.
-  let AltOrders = [(rotl QPR, 8)];
-  let AltOrderSelect = [{ return 1; }];
+  let AltOrders = [(rotl QPR, 8), (trunc QPR, 8)];
+  let AltOrderSelect = [{
+    return 1 + MF.getSubtarget<ARMSubtarget>().hasMVEIntegerOps();
+  }];
   let DiagnosticString = "operand must be a register in range [q0, q15]";
 }
 
@@ -453,6 +455,12 @@
   let DiagnosticString = "operand must be a register in range [q0, q3]";
 }
 
+// MVE 128-bit vector register class. This class is only really needed for
+// parsing assembly, since we still have to truncate the register set in the QPR
+// class anyway.
+def MQPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16],
+                         128, (trunc QPR, 8)>;
+
 // Pseudo-registers representing odd-even pairs of D registers. The even-odd
 // pairs are already represented by the Q registers.
 // These are needed by NEON instructions requiring two consecutive D registers.
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -69,6 +69,10 @@
 
 using namespace llvm;
 
+namespace llvm {
+extern const MCInstrDesc ARMInsts[];
+} // end namespace llvm
+
 namespace {
 
 enum class ImplicitItModeTy { Always, Never, ARMOnly, ThumbOnly };
@@ -364,6 +368,18 @@
     ITState.IsExplicit = true;
   }
 
+  struct {
+    unsigned Mask : 4;
+    unsigned CurPosition;
+  } VPTState;
+  bool inVPTBlock() { return VPTState.CurPosition != ~0U; }
+  void forwardVPTPosition() {
+    if (!inVPTBlock()) return;
+    unsigned TZ = countTrailingZeros(VPTState.Mask);
+    if (++VPTState.CurPosition == 5 - TZ)
+      VPTState.CurPosition = ~0U;
+  }
+
   void Note(SMLoc L, const Twine &Msg, SMRange Range = None) {
     return getParser().Note(L, Msg, Range);
   }
@@ -422,12 +438,15 @@
   bool parseDirectiveAlign(SMLoc L);
   bool parseDirectiveThumbSet(SMLoc L);
 
-  StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
-                          bool &CarrySetting, unsigned &ProcessorIMod,
-                          StringRef &ITMask);
-  void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
-                             bool &CanAcceptCarrySet,
-                             bool &CanAcceptPredicationCode);
+  bool isMnemonicVPTPredicable(StringRef Mnemonic, StringRef ExtraToken);
+  StringRef splitMnemonic(StringRef Mnemonic, StringRef ExtraToken,
+                          unsigned &PredicationCode,
+                          unsigned &VPTPredicationCode, bool &CarrySetting,
+                          unsigned &ProcessorIMod, StringRef &ITMask);
+  void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef ExtraToken,
+                             StringRef FullInst, bool &CanAcceptCarrySet,
+                             bool &CanAcceptPredicationCode,
+                             bool &CanAcceptVPTPredicationCode);
 
   void tryConvertingToTwoOperandForm(StringRef Mnemonic, bool CarrySetting,
                                      OperandVector &Operands);
@@ -482,6 +501,15 @@
   bool hasV8_1MMainline() const {
     return getSTI().getFeatureBits()[ARM::HasV8_1MMainlineOps];
   }
+  bool hasMVEInt() const {
+    return getSTI().getFeatureBits()[ARM::HasMVEIntegerOps];
+  }
+  bool hasMVEFloat() const {
+    return getSTI().getFeatureBits()[ARM::HasMVEFloatOps];
+  }
+  bool hasMVE() const {
+    return hasMVEInt() || hasMVEFloat();
+  }
   bool has8MSecExt() const {
     return getSTI().getFeatureBits()[ARM::Feature8MSecExt];
   }
@@ -564,6 +592,7 @@
   bool processInstruction(MCInst &Inst, const OperandVector &Ops, MCStreamer &Out);
   bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands);
   bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands);
+  bool shouldOmitVectorPredicateOperand(StringRef Mnemonic, OperandVector &Operands);
   bool isITBlockTerminator(MCInst &Inst) const;
   void fixupGNULDRDAlias(StringRef Mnemonic, OperandVector &Operands);
   bool validateLDRDSTRD(MCInst &Inst, const OperandVector &Operands,
@@ -600,6 +629,8 @@
     // Not in an ITBlock to start with.
     ITState.CurPosition = ~0U;
 
+    VPTState.CurPosition = ~0U;
+
     NextSymbolIsThumb = false;
   }
 
@@ -645,6 +676,7 @@
 class ARMOperand : public MCParsedAsmOperand {
   enum KindTy {
     k_CondCode,
+    k_VPTPred,
     k_CCOut,
     k_ITCondMask,
     k_CoprocNum,
@@ -687,6 +719,10 @@
     ARMCC::CondCodes Val;
   };
 
+  struct VCCOp {
+    ARMVCC::VPTCodes Val;
+  };
+
   struct CopOp {
     unsigned Val;
   };
@@ -803,6 +839,7 @@
 
   union {
     struct CCOp CC;
+    struct VCCOp VCC;
     struct CopOp Cop;
     struct CoprocOptionOp CoprocOption;
     struct MBOptOp MBOpt;
@@ -851,6 +888,11 @@
     return CC.Val;
   }
 
+  ARMVCC::VPTCodes getVPTPred() const {
+    assert(isVPTPred() && "Invalid access!");
+    return VCC.Val;
+  }
+
   unsigned getCoproc() const {
     assert((Kind == k_CoprocNum || Kind == k_CoprocReg) && "Invalid access!");
     return Cop.Val;
@@ -924,6 +966,7 @@
   bool isCoprocReg() const { return Kind == k_CoprocReg; }
   bool isCoprocOption() const { return Kind == k_CoprocOption; }
   bool isCondCode() const { return Kind == k_CondCode; }
+  bool isVPTPred() const { return Kind == k_VPTPred; }
   bool isCCOut() const { return Kind == k_CCOut; }
   bool isITMask() const { return Kind == k_ITCondMask; }
   bool isITCondCode() const { return Kind == k_CondCode; }
@@ -1862,6 +1905,8 @@
     return VectorList.Count == 4 && VectorList.LaneIndex <= 1;
   }
 
+  bool isVectorIndex() const { return Kind == k_VectorIndex; }
+
   bool isVectorIndex8() const {
     if (Kind != k_VectorIndex) return false;
     return VectorIndex.Val < 8;
@@ -2063,6 +2108,36 @@
     return CC != ARMCC::AL;
   }
 
+  bool isITCondCodeRestrictedI() const {
+    if (!isITCondCode())
+      return false;
+    ARMCC::CondCodes CC = (ARMCC::CondCodes)getCondCode();
+    return CC == ARMCC::EQ || CC == ARMCC::NE;
+  }
+
+  bool isITCondCodeRestrictedS() const {
+    if (!isITCondCode())
+      return false;
+    ARMCC::CondCodes CC = (ARMCC::CondCodes)getCondCode();
+    return CC == ARMCC::LT || CC == ARMCC::GT || CC == ARMCC::LE ||
+           CC == ARMCC::GE;
+  }
+
+  bool isITCondCodeRestrictedU() const {
+    if (!isITCondCode())
+      return false;
+    ARMCC::CondCodes CC = (ARMCC::CondCodes)getCondCode();
+    return CC == ARMCC::HS || CC == ARMCC::HI;
+  }
+
+  bool isITCondCodeRestrictedFP() const {
+    if (!isITCondCode())
+      return false;
+    ARMCC::CondCodes CC = (ARMCC::CondCodes)getCondCode();
+    return CC == ARMCC::EQ || CC == ARMCC::NE || CC == ARMCC::LT ||
+           CC == ARMCC::GT || CC == ARMCC::LE || CC == ARMCC::GE;
+  }
+
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     // Add as immediates when possible.  Null MCExpr = 0.
     if (!Expr)
@@ -2090,6 +2165,30 @@
     Inst.addOperand(MCOperand::createReg(RegNum));
   }
 
+  void addVPTPredNOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createImm(unsigned(getVPTPred())));
+    unsigned RegNum = getVPTPred() == ARMVCC::None ? 0: ARM::P0;
+    Inst.addOperand(MCOperand::createReg(RegNum));
+  }
+
+  void addVPTPredROperands(MCInst &Inst, unsigned N) const {
+    assert(N == 3 && "Invalid number of operands!");
+    addVPTPredNOperands(Inst, N-1);
+    unsigned RegNum;
+    if (getVPTPred() == ARMVCC::None) {
+      RegNum = 0;
+    } else {
+      unsigned NextOpIndex = Inst.getNumOperands();
+      const MCInstrDesc &MCID = ARMInsts[Inst.getOpcode()];
+      int TiedOp = MCID.getOperandConstraint(NextOpIndex, MCOI::TIED_TO);
+      assert(TiedOp >= 0 &&
+             "Inactive register in vpred_r is not tied to an output!");
+      RegNum = Inst.getOperand(TiedOp).getReg();
+    }
+    Inst.addOperand(MCOperand::createReg(RegNum));
+  }
+
   void addCoprocNumOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createImm(getCoproc()));
@@ -2820,6 +2919,12 @@
     Inst.addOperand(MCOperand::createImm(Imm));
   }
 
+  void addPowerTwoOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::createImm(CE->getValue()));
+  }
+
   void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createImm(unsigned(getMSRMask())));
@@ -3034,6 +3139,15 @@
     return Op;
   }
 
+  static std::unique_ptr<ARMOperand> CreateVPTPred(ARMVCC::VPTCodes CC,
+                                                   SMLoc S) {
+    auto Op = make_unique<ARMOperand>(k_VPTPred);
+    Op->VCC.Val = CC;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
   static std::unique_ptr<ARMOperand> CreateCoprocNum(unsigned CopVal, SMLoc S) {
     auto Op = make_unique<ARMOperand>(k_CoprocNum);
     Op->Cop.Val = CopVal;
@@ -3350,6 +3464,9 @@
   case k_CondCode:
     OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
     break;
+  case k_VPTPred:
+    OS << "<ARMVCC::" << ARMVPTPredToString(getVPTPred()) << ">";
+    break;
   case k_CCOut:
     OS << "<ccout " << RegName(getReg()) << ">";
     break;
@@ -5777,11 +5894,14 @@
 // FIXME: Would be nice to autogen this.
 // FIXME: This is a bit of a maze of special cases.
 StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
+                                      StringRef ExtraToken,
                                       unsigned &PredicationCode,
+                                      unsigned &VPTPredicationCode,
                                       bool &CarrySetting,
                                       unsigned &ProcessorIMod,
                                       StringRef &ITMask) {
   PredicationCode = ARMCC::AL;
+  VPTPredicationCode = ARMVCC::None;
   CarrySetting = false;
   ProcessorIMod = 0;
 
@@ -5859,12 +5979,30 @@
     }
   }
 
+  if (isMnemonicVPTPredicable(Mnemonic, ExtraToken)) {
+    unsigned CC = ARMVectorCondCodeFromString(Mnemonic.substr(Mnemonic.size()-1));
+    if (CC != ~0U) {
+      Mnemonic = Mnemonic.slice(0, Mnemonic.size()-1);
+      VPTPredicationCode = CC;
+    }
+    return Mnemonic;
+  }
+
   // The "it" instruction has the condition mask on the end of the mnemonic.
   if (Mnemonic.startswith("it")) {
     ITMask = Mnemonic.slice(2, Mnemonic.size());
     Mnemonic = Mnemonic.slice(0, 2);
   }
 
+  if (Mnemonic.startswith("vpst")) {
+    ITMask = Mnemonic.slice(4, Mnemonic.size());
+    Mnemonic = Mnemonic.slice(0, 4);
+  }
+  else if (Mnemonic.startswith("vpt")) {
+    ITMask = Mnemonic.slice(3, Mnemonic.size());
+    Mnemonic = Mnemonic.slice(0, 3);
+  }
+
   return Mnemonic;
 }
 
@@ -5872,9 +6010,14 @@
 /// inclusion of carry set or predication code operands.
 //
 // FIXME: It would be nice to autogen this.
-void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic,
+                                         StringRef ExtraToken,
+                                         StringRef FullInst,
                                          bool &CanAcceptCarrySet,
-                                         bool &CanAcceptPredicationCode) {
+                                         bool &CanAcceptPredicationCode,
+                                         bool &CanAcceptVPTPredicationCode) {
+  CanAcceptVPTPredicationCode = isMnemonicVPTPredicable(Mnemonic, ExtraToken);
+
   CanAcceptCarrySet =
       Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
       Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
@@ -5908,7 +6051,8 @@
       Mnemonic == "dls" || Mnemonic == "le" || Mnemonic == "csel" ||
       Mnemonic == "csinc" || Mnemonic == "csinv" || Mnemonic == "csneg" ||
       Mnemonic == "cinc" || Mnemonic == "cinv" || Mnemonic == "cneg" ||
-      Mnemonic == "cset" || Mnemonic == "csetm") {
+      Mnemonic == "cset" || Mnemonic == "csetm" ||
+      Mnemonic.startswith("vpt") || Mnemonic.startswith("vpst")) {
     // These mnemonics are never predicable
     CanAcceptPredicationCode = false;
   } else if (!isThumb()) {
@@ -6160,6 +6304,25 @@
   return false;
 }
 
+bool ARMAsmParser::shouldOmitVectorPredicateOperand(StringRef Mnemonic,
+                                                    OperandVector &Operands) {
+  if (!hasMVE() || Operands.size() < 3)
+    return true;
+
+  for (auto &Operand : Operands) {
+    // We check both QPR and MQPR to more accurately report errors when
+    // using Q registers outside of the allowed range.
+    if (static_cast<ARMOperand &>(*Operand).isVectorIndex() ||
+        (Operand->isReg() &&
+         (ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(
+              Operand->getReg()) ||
+          ARMMCRegisterClasses[ARM::QPRRegClassID].contains(
+              Operand->getReg()))))
+      return false;
+  }
+  return true;
+}
+
 static bool isDataTypeToken(StringRef Tok) {
   return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" ||
     Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" ||
@@ -6251,14 +6414,16 @@
   // Create the leading tokens for the mnemonic, split by '.' characters.
   size_t Start = 0, Next = Name.find('.');
   StringRef Mnemonic = Name.slice(Start, Next);
+  StringRef ExtraToken = Name.slice(Next, Name.find(' ', Next + 1));
 
   // Split out the predication code and carry setting flag from the mnemonic.
   unsigned PredicationCode;
+  unsigned VPTPredicationCode;
   unsigned ProcessorIMod;
   bool CarrySetting;
   StringRef ITMask;
-  Mnemonic = splitMnemonic(Mnemonic, PredicationCode, CarrySetting,
-                           ProcessorIMod, ITMask);
+  Mnemonic = splitMnemonic(Mnemonic, ExtraToken, PredicationCode, VPTPredicationCode,
+                           CarrySetting, ProcessorIMod, ITMask);
 
   // In Thumb1, only the branch (B) instruction can be predicated.
   if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") {
@@ -6272,10 +6437,15 @@
   // encoding has a '1' as it's bit0 (i.e. 't' ==> '1'). In the case
   // where the conditional bit0 is zero, the instruction post-processing
   // will adjust the mask accordingly.
-  if (Mnemonic == "it") {
-    SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2);
+  if (Mnemonic == "it" || Mnemonic.startswith("vpt") ||
+      Mnemonic.startswith("vpst")) {
+    SMLoc Loc = Mnemonic == "it"  ? SMLoc::getFromPointer(NameLoc.getPointer() + 2) :
+                Mnemonic == "vpt" ? SMLoc::getFromPointer(NameLoc.getPointer() + 3) :
+                                    SMLoc::getFromPointer(NameLoc.getPointer() + 4);
     if (ITMask.size() > 3) {
-      return Error(Loc, "too many conditions on IT instruction");
+      if (Mnemonic == "it")
+        return Error(Loc, "too many conditions on IT instruction");
+      return Error(Loc, "too many conditions on VPT instruction");
     }
     unsigned Mask = 8;
     for (unsigned i = ITMask.size(); i != 0; --i) {
@@ -6284,7 +6454,7 @@
         return Error(Loc, "illegal IT block condition mask '" + ITMask + "'");
       }
       Mask >>= 1;
-      if (ITMask[i - 1] == 't')
+      if (ITMask[i - 1] == (Mnemonic.startswith("vp") ? 'e' : 't'))
         Mask |= 8;
     }
     Operands.push_back(ARMOperand::CreateITMask(Mask, Loc));
@@ -6300,8 +6470,9 @@
   // ConditionCode operands to match the mnemonic "as written" and then we let
   // the matcher deal with finding the right instruction or generating an
   // appropriate error.
-  bool CanAcceptCarrySet, CanAcceptPredicationCode;
-  getMnemonicAcceptInfo(Mnemonic, Name, CanAcceptCarrySet, CanAcceptPredicationCode);
+  bool CanAcceptCarrySet, CanAcceptPredicationCode, CanAcceptVPTPredicationCode;
+  getMnemonicAcceptInfo(Mnemonic, ExtraToken, Name, CanAcceptCarrySet,
+                        CanAcceptPredicationCode, CanAcceptVPTPredicationCode);
 
   // If we had a carry-set on an instruction that can't do that, issue an
   // error.
@@ -6316,6 +6487,13 @@
                  "' is not predicable, but condition code specified");
   }
 
+  // If we had a VPT predication code on an instruction that can't do that, issue an
+  // error.
+  if (!CanAcceptVPTPredicationCode && VPTPredicationCode != ARMVCC::None) {
+    return Error(NameLoc, "instruction '" + Mnemonic +
+                 "' is not VPT predicable, but VPT code T/E is specified");
+  }
+
   // Add the carry setting operand, if necessary.
   if (CanAcceptCarrySet) {
     SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size());
@@ -6328,7 +6506,15 @@
     SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size() +
                                       CarrySetting);
     Operands.push_back(ARMOperand::CreateCondCode(
-                         ARMCC::CondCodes(PredicationCode), Loc));
+                       ARMCC::CondCodes(PredicationCode), Loc));
+  }
+
+  // Add the VPT predication code operand, if necessary.
+  if (CanAcceptVPTPredicationCode) {
+    SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size() +
+                                      CarrySetting);
+    Operands.push_back(ARMOperand::CreateVPTPred(
+                         ARMVCC::VPTCodes(VPTPredicationCode), Loc));
   }
 
   // Add the processor imod operand, if necessary.
@@ -6344,7 +6530,7 @@
   while (Next != StringRef::npos) {
     Start = Next;
     Next = Name.find('.', Start + 1);
-    StringRef ExtraToken = Name.slice(Start, Next);
+    ExtraToken = Name.slice(Start, Next);
 
     // Some NEON instructions have an optional datatype suffix that is
     // completely ignored. Check for that.
@@ -6400,57 +6586,96 @@
 
   // Some instructions have the same mnemonic, but don't always
   // have a predicate. Distinguish them here and delete the
-  // predicate if needed.
+  // appropriate predicate if needed.  This could be either the scalar
+  // predication code or the vector predication code.
   if (PredicationCode == ARMCC::AL &&
       shouldOmitPredicateOperand(Mnemonic, Operands))
     Operands.erase(Operands.begin() + 1);
 
-  // ARM mode 'blx' need special handling, as the register operand version
-  // is predicable, but the label operand version is not. So, we can't rely
-  // on the Mnemonic based checking to correctly figure out when to put
-  // a k_CondCode operand in the list. If we're trying to match the label
-  // version, remove the k_CondCode operand here.
-  if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 &&
-      static_cast<ARMOperand &>(*Operands[2]).isImm())
-    Operands.erase(Operands.begin() + 1);
 
-  // Adjust operands of ldrexd/strexd to MCK_GPRPair.
-  // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
-  // a single GPRPair reg operand is used in the .td file to replace the two
-  // GPRs. However, when parsing from asm, the two GRPs cannot be automatically
-  // expressed as a GPRPair, so we have to manually merge them.
-  // FIXME: We would really like to be able to tablegen'erate this.
-  if (!isThumb() && Operands.size() > 4 &&
-      (Mnemonic == "ldrexd" || Mnemonic == "strexd" || Mnemonic == "ldaexd" ||
-       Mnemonic == "stlexd")) {
-    bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd");
-    unsigned Idx = isLoad ? 2 : 3;
-    ARMOperand &Op1 = static_cast<ARMOperand &>(*Operands[Idx]);
-    ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[Idx + 1]);
-
-    const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID);
-    // Adjust only if Op1 and Op2 are GPRs.
-    if (Op1.isReg() && Op2.isReg() && MRC.contains(Op1.getReg()) &&
-        MRC.contains(Op2.getReg())) {
-      unsigned Reg1 = Op1.getReg();
-      unsigned Reg2 = Op2.getReg();
-      unsigned Rt = MRI->getEncodingValue(Reg1);
-      unsigned Rt2 = MRI->getEncodingValue(Reg2);
-
-      // Rt2 must be Rt + 1 and Rt must be even.
-      if (Rt + 1 != Rt2 || (Rt & 1)) {
-        return Error(Op2.getStartLoc(),
-                     isLoad ? "destination operands must be sequential"
-                            : "source operands must be sequential");
+  if (hasMVE()) {
+    if (CanAcceptVPTPredicationCode) {
+      // For all other instructions, make sure only one of the two
+      // predication operands is left behind, depending on whether we should
+      // use the vector predication.
+      if (shouldOmitVectorPredicateOperand(Mnemonic, Operands)) {
+        if (CanAcceptPredicationCode)
+          Operands.erase(Operands.begin() + 2);
+        else
+          Operands.erase(Operands.begin() + 1);
+      } else if (CanAcceptPredicationCode && PredicationCode == ARMCC::AL) {
+        Operands.erase(Operands.begin() + 1);
       }
-      unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0,
-          &(MRI->getRegClass(ARM::GPRPairRegClassID)));
-      Operands[Idx] =
-          ARMOperand::CreateReg(NewReg, Op1.getStartLoc(), Op2.getEndLoc());
-      Operands.erase(Operands.begin() + Idx + 1);
     }
   }
 
+  if (VPTPredicationCode != ARMVCC::None) {
+    bool usedVPTPredicationCode = false;
+    for (unsigned I = 1; I < Operands.size(); ++I)
+      if (static_cast<ARMOperand &>(*Operands[I]).isVPTPred())
+        usedVPTPredicationCode = true;
+    if (!usedVPTPredicationCode) {
+      // If we have a VPT predication code and we haven't just turned it
+      // into an operand, then it was a mistake for splitMnemonic to
+      // separate it from the rest of the mnemonic in the first place,
+      // and this may lead to wrong disassembly (e.g. scalar floating
+      // point VCMPE is actually a different instruction from VCMP, so
+      // we mustn't treat them the same). In that situation, glue it
+      // back on.
+      Mnemonic = Name.slice(0, Mnemonic.size() + 1);
+      Operands.erase(Operands.begin());
+      Operands.insert(Operands.begin(),
+                      ARMOperand::CreateToken(Mnemonic, NameLoc));
+    }
+  }
+
+    // ARM mode 'blx' need special handling, as the register operand version
+    // is predicable, but the label operand version is not. So, we can't rely
+    // on the Mnemonic based checking to correctly figure out when to put
+    // a k_CondCode operand in the list. If we're trying to match the label
+    // version, remove the k_CondCode operand here.
+    if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 &&
+        static_cast<ARMOperand &>(*Operands[2]).isImm())
+      Operands.erase(Operands.begin() + 1);
+
+    // Adjust operands of ldrexd/strexd to MCK_GPRPair.
+    // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+    // a single GPRPair reg operand is used in the .td file to replace the two
+    // GPRs. However, when parsing from asm, the two GRPs cannot be
+    // automatically
+    // expressed as a GPRPair, so we have to manually merge them.
+    // FIXME: We would really like to be able to tablegen'erate this.
+    if (!isThumb() && Operands.size() > 4 &&
+        (Mnemonic == "ldrexd" || Mnemonic == "strexd" || Mnemonic == "ldaexd" ||
+         Mnemonic == "stlexd")) {
+      bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd");
+      unsigned Idx = isLoad ? 2 : 3;
+      ARMOperand &Op1 = static_cast<ARMOperand &>(*Operands[Idx]);
+      ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[Idx + 1]);
+
+      const MCRegisterClass &MRC = MRI->getRegClass(ARM::GPRRegClassID);
+      // Adjust only if Op1 and Op2 are GPRs.
+      if (Op1.isReg() && Op2.isReg() && MRC.contains(Op1.getReg()) &&
+          MRC.contains(Op2.getReg())) {
+        unsigned Reg1 = Op1.getReg();
+        unsigned Reg2 = Op2.getReg();
+        unsigned Rt = MRI->getEncodingValue(Reg1);
+        unsigned Rt2 = MRI->getEncodingValue(Reg2);
+
+        // Rt2 must be Rt + 1 and Rt must be even.
+        if (Rt + 1 != Rt2 || (Rt & 1)) {
+          return Error(Op2.getStartLoc(),
+                       isLoad ? "destination operands must be sequential"
+                              : "source operands must be sequential");
+        }
+        unsigned NewReg = MRI->getMatchingSuperReg(
+            Reg1, ARM::gsub_0, &(MRI->getRegClass(ARM::GPRPairRegClassID)));
+        Operands[Idx] =
+            ARMOperand::CreateReg(NewReg, Op1.getStartLoc(), Op2.getEndLoc());
+        Operands.erase(Operands.begin() + Idx + 1);
+      }
+  }
+
   // GNU Assembler extension (compatibility).
   fixupGNULDRDAlias(Mnemonic, Operands);
 
@@ -6609,6 +6834,17 @@
   return false;
 }
 
+static int findFirstVectorPredOperandIdx(const MCInstrDesc &MCID) {
+  for (unsigned i = 0; i < MCID.NumOperands; ++i) {
+    if (ARM::isVpred(MCID.OpInfo[i].OperandType))
+      return i;
+  }
+  return -1;
+}
+
+static bool isVectorPredicable(const MCInstrDesc &MCID) {
+  return findFirstVectorPredOperandIdx(MCID) != -1;
+}
 
 // FIXME: We would really like to be able to tablegen'erate this.
 bool ARMAsmParser::validateInstruction(MCInst &Inst,
@@ -6667,6 +6903,30 @@
     return Error(Loc, "instruction must be outside of IT block or the last instruction in an IT block");
   }
 
+  if (inVPTBlock() && !instIsBreakpoint(Inst)) {
+    unsigned Bit = 0;
+    if (VPTState.CurPosition != 0)
+      Bit = (VPTState.Mask >> (5 - VPTState.CurPosition)) & 1;
+    if (!isVectorPredicable(MCID))
+      return Error(Loc, "instruction in VPT block must be predicable");
+    unsigned Pred = Inst.getOperand(findFirstVectorPredOperandIdx(MCID)).getImm();
+    unsigned VPTPred = Bit ? ARMVCC::Else : ARMVCC::Then;
+    if (Pred != VPTPred) {
+      SMLoc PredLoc;
+      for (unsigned I = 1; I < Operands.size(); ++I)
+        if (static_cast<ARMOperand &>(*Operands[I]).isVPTPred())
+          PredLoc = Operands[I]->getStartLoc();
+      return Error(PredLoc, "incorrect predication in VPT block; got '" +
+                   StringRef(ARMVPTPredToString(ARMVCC::VPTCodes(Pred))) +
+                   "', but expected '" +
+                   ARMVPTPredToString(ARMVCC::VPTCodes(VPTPred)) + "'");
+    }
+  }
+  else if (hasMVE() && isVectorPredicable(MCID) &&
+           Inst.getOperand(findFirstVectorPredOperandIdx(MCID)).getImm() !=
+           ARMVCC::None)
+    return Error(Loc, "VPT predicated instructions must be in VPT block");
+
   const unsigned Opcode = Inst.getOpcode();
   switch (Opcode) {
   case ARM::t2IT: {
@@ -9338,6 +9598,35 @@
       return true;
     }
     return false;
+  case ARM::t2VPST:
+  case ARM::t2VPTv16i8:
+  case ARM::t2VPTv8i16:
+  case ARM::t2VPTv4i32:
+  case ARM::t2VPTv16u8:
+  case ARM::t2VPTv8u16:
+  case ARM::t2VPTv4u32:
+  case ARM::t2VPTv16s8:
+  case ARM::t2VPTv8s16:
+  case ARM::t2VPTv4s32:
+  case ARM::t2VPTv4f32:
+  case ARM::t2VPTv8f16:
+  case ARM::t2VPTv16i8r:
+  case ARM::t2VPTv8i16r:
+  case ARM::t2VPTv4i32r:
+  case ARM::t2VPTv16u8r:
+  case ARM::t2VPTv8u16r:
+  case ARM::t2VPTv4u32r:
+  case ARM::t2VPTv16s8r:
+  case ARM::t2VPTv8s16r:
+  case ARM::t2VPTv4s32r:
+  case ARM::t2VPTv4f32r:
+  case ARM::t2VPTv8f16r: {
+    assert(!inVPTBlock() && "Nested VPT blocks are not allowed");
+    MCOperand &MO = Inst.getOperand(0);
+    VPTState.Mask = MO.getImm();
+    VPTState.CurPosition = 0;
+    break;
+  }
   }
   return false;
 }
@@ -9578,6 +9867,7 @@
       // Still progress the IT block, otherwise one wrong condition causes
       // nasty cascading errors.
       forwardITPosition();
+      forwardVPTPosition();
       return true;
     }
 
@@ -9604,6 +9894,7 @@
     // and process gets a consistent answer about whether we're in an IT
     // block.
     forwardITPosition();
+    forwardVPTPosition();
 
     // ITasm is an ARM mode pseudo-instruction that just sets the ITblock and
     // doesn't actually encode.
@@ -10982,3 +11273,76 @@
   }
   return Match_InvalidOperand;
 }
+
+bool ARMAsmParser::isMnemonicVPTPredicable(StringRef Mnemonic,
+                                           StringRef ExtraToken) {
+  if (!hasMVE())
+    return false;
+
+  return Mnemonic.startswith("vabav") || Mnemonic.startswith("vaddv") ||
+         Mnemonic.startswith("vaddlv") || Mnemonic.startswith("vminnmv") ||
+         Mnemonic.startswith("vminnmav") || Mnemonic.startswith("vminv") ||
+         Mnemonic.startswith("vminav") || Mnemonic.startswith("vmaxnmv") ||
+         Mnemonic.startswith("vmaxnmav") || Mnemonic.startswith("vmaxv") ||
+         Mnemonic.startswith("vmaxav") || Mnemonic.startswith("vmladav") ||
+         Mnemonic.startswith("vrmlaldavh") || Mnemonic.startswith("vrmlalvh") ||
+         Mnemonic.startswith("vmlsdav") || Mnemonic.startswith("vmlav") ||
+         Mnemonic.startswith("vmlaldav") || Mnemonic.startswith("vmlalv") ||
+         Mnemonic.startswith("vmaxnm") || Mnemonic.startswith("vminnm") ||
+         Mnemonic.startswith("vmax") || Mnemonic.startswith("vmin") ||
+         Mnemonic.startswith("vshlc") || Mnemonic.startswith("vmovlt") ||
+         Mnemonic.startswith("vmovlb") || Mnemonic.startswith("vshll") ||
+         Mnemonic.startswith("vrshrn") || Mnemonic.startswith("vshrn") ||
+         Mnemonic.startswith("vqrshrun") || Mnemonic.startswith("vqshrun") ||
+         Mnemonic.startswith("vqrshrn") || Mnemonic.startswith("vqshrn") ||
+         Mnemonic.startswith("vbic") || Mnemonic.startswith("vrev64") ||
+         Mnemonic.startswith("vrev32") || Mnemonic.startswith("vrev16") ||
+         Mnemonic.startswith("vmvn") || Mnemonic.startswith("veor") ||
+         Mnemonic.startswith("vorn") || Mnemonic.startswith("vorr") ||
+         Mnemonic.startswith("vand") || Mnemonic.startswith("vmul") ||
+         Mnemonic.startswith("vqrdmulh") || Mnemonic.startswith("vqdmulh") ||
+         Mnemonic.startswith("vsub") || Mnemonic.startswith("vadd") ||
+         Mnemonic.startswith("vqsub") || Mnemonic.startswith("vqadd") ||
+         Mnemonic.startswith("vabd") || Mnemonic.startswith("vrhadd") ||
+         Mnemonic.startswith("vhsub") || Mnemonic.startswith("vhadd") ||
+         Mnemonic.startswith("vdup") || Mnemonic.startswith("vcls") ||
+         Mnemonic.startswith("vclz") || Mnemonic.startswith("vneg") ||
+         Mnemonic.startswith("vabs") || Mnemonic.startswith("vqneg") ||
+         Mnemonic.startswith("vqabs") ||
+         (Mnemonic.startswith("vrint") && Mnemonic != "vrintr") ||
+         Mnemonic.startswith("vcmla") || Mnemonic.startswith("vfma") ||
+         Mnemonic.startswith("vfms") || Mnemonic.startswith("vcadd") ||
+         Mnemonic.startswith("vadd") || Mnemonic.startswith("vsub") ||
+         Mnemonic.startswith("vshl") || Mnemonic.startswith("vqshl") ||
+         Mnemonic.startswith("vqrshl") || Mnemonic.startswith("vrshl") ||
+         Mnemonic.startswith("vsri") || Mnemonic.startswith("vsli") ||
+         Mnemonic.startswith("vrshr") || Mnemonic.startswith("vshr") ||
+         Mnemonic.startswith("vpsel") || Mnemonic.startswith("vcmp") ||
+         Mnemonic.startswith("vqdmladh") || Mnemonic.startswith("vqrdmladh") ||
+         Mnemonic.startswith("vqdmlsdh") || Mnemonic.startswith("vqrdmlsdh") ||
+         Mnemonic.startswith("vcmul") || Mnemonic.startswith("vrmulh") ||
+         Mnemonic.startswith("vqmovn") || Mnemonic.startswith("vqmovun") ||
+         Mnemonic.startswith("vmovnt") || Mnemonic.startswith("vmovnb") ||
+         Mnemonic.startswith("vmaxa") || Mnemonic.startswith("vmaxnma") ||
+         Mnemonic.startswith("vhcadd") || Mnemonic.startswith("vadc") ||
+         Mnemonic.startswith("vsbc") || Mnemonic.startswith("vrshr") ||
+         Mnemonic.startswith("vshr") || Mnemonic.startswith("vstrb") ||
+         Mnemonic.startswith("vldrb") ||
+         (Mnemonic.startswith("vstrh") && Mnemonic != "vstrhi") ||
+         (Mnemonic.startswith("vldrh") && Mnemonic != "vldrhi") ||
+         Mnemonic.startswith("vstrw") || Mnemonic.startswith("vldrw") ||
+         Mnemonic.startswith("vldrd") || Mnemonic.startswith("vstrd") ||
+         Mnemonic.startswith("vqdmull") || Mnemonic.startswith("vbrsr") ||
+         Mnemonic.startswith("vfmas") || Mnemonic.startswith("vmlas") ||
+         Mnemonic.startswith("vmla") || Mnemonic.startswith("vqdmlash") ||
+         Mnemonic.startswith("vqdmlah") || Mnemonic.startswith("vqrdmlash") ||
+         Mnemonic.startswith("vqrdmlah") || Mnemonic.startswith("viwdup") ||
+         Mnemonic.startswith("vdwdup") || Mnemonic.startswith("vidup") ||
+         Mnemonic.startswith("vddup") || Mnemonic.startswith("vctp") ||
+         Mnemonic.startswith("vpnot") || Mnemonic.startswith("vbic") ||
+         Mnemonic.startswith("vrmlsldavh") || Mnemonic.startswith("vmlsldav") ||
+         Mnemonic.startswith("vcvt") ||
+         (Mnemonic.startswith("vmov") &&
+          !(ExtraToken == ".f16" || ExtraToken == ".32" ||
+            ExtraToken == ".16" || ExtraToken == ".8"));
+}
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMBaseInstrInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
@@ -87,6 +88,47 @@
       std::vector<unsigned char> ITStates;
   };
 
+  class VPTStatus
+  {
+    public:
+      unsigned getVPTPred() {
+        unsigned Pred = ARMVCC::None;
+        if (instrInVPTBlock())
+          Pred = VPTStates.back();
+        return Pred;
+      }
+
+      void advanceVPTState() {
+        VPTStates.pop_back();
+      }
+
+      bool instrInVPTBlock() {
+        return !VPTStates.empty();
+      }
+
+      bool instrLastInVPTBlock() {
+        return VPTStates.size() == 1;
+      }
+
+      void setVPTState(char Mask) {
+        // (3 - the number of trailing zeros) is the number of then / else.
+        unsigned NumTZ = countTrailingZeros<uint8_t>(Mask);
+        assert(NumTZ <= 3 && "Invalid VPT mask!");
+        // push predicates onto the stack the correct order for the pops
+        for (unsigned Pos = NumTZ+1; Pos <= 3; ++Pos) {
+          bool T = ((Mask >> Pos) & 1) == 0;
+          if (T)
+            VPTStates.push_back(ARMVCC::Then);
+          else
+            VPTStates.push_back(ARMVCC::Else);
+        }
+        VPTStates.push_back(ARMVCC::Then);
+      }
+
+    private:
+      std::vector<unsigned char> VPTStates;
+  };
+
 /// ARM disassembler for all ARM platforms.
 class ARMDisassembler : public MCDisassembler {
 public:
@@ -118,6 +160,7 @@
 
 private:
   mutable ITStatus ITBlock;
+  mutable VPTStatus VPTBlock;
 
   DecodeStatus AddThumbPredicate(MCInst&) const;
   void UpdateThumbVFPPredicate(DecodeStatus &, MCInst&) const;
@@ -183,6 +226,8 @@
                                                 const void *Decoder);
 static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
@@ -441,6 +486,23 @@
                                            const void *Decoder);
 static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
                                   const void *Decoder);
+static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned Val,
+                                        uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val,
+                                                     uint64_t Address,
+                                                     const void *Decoder);
+static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val,
+                                                     uint64_t Address,
+                                                     const void *Decoder);
+static DecodeStatus DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val,
+                                                     uint64_t Address,
+                                                     const void *Decoder);
+static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst,
+                                                       unsigned Val,
+                                                       uint64_t Address,
+                                                       const void *Decoder);
 static DecodeStatus DecodeVSTRVLDR_SYSREG_off(MCInst &Inst, unsigned Insn,
                                               uint64_t Address,
                                               const void *Decoder);
@@ -624,6 +686,16 @@
   MI.insert(I, MCOperand::createReg(InITBlock ? 0 : ARM::CPSR));
 }
 
+static bool isVectorPredicable(unsigned Opcode) {
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned short NumOps = ARMInsts[Opcode].NumOperands;
+  for (unsigned i = 0; i < NumOps; ++i) {
+    if (ARM::isVpred(OpInfo[i].OperandType))
+      return true;
+  }
+  return false;
+}
+
 // Most Thumb instructions don't have explicit predicates in the
 // encoding, but rather get their predicates from IT context.  We need
 // to fix up the predicate operands using this context information as a
@@ -675,39 +747,66 @@
       break;
   }
 
-  // If we're in an IT block, base the predicate on that.  Otherwise,
+  // Warn on non-VPT predicable instruction in a VPT block and a VPT
+  // predicable instruction in an IT block
+  if ((!isVectorPredicable(MI.getOpcode()) && VPTBlock.instrInVPTBlock()) ||
+       (isVectorPredicable(MI.getOpcode()) && ITBlock.instrInITBlock()))
+    S = SoftFail;
+
+  // If we're in an IT/VPT block, base the predicate on that.  Otherwise,
   // assume a predicate of AL.
-  unsigned CC;
-  CC = ITBlock.getITCC();
-  if (CC == 0xF)
-    CC = ARMCC::AL;
-  if (ITBlock.instrInITBlock())
+  unsigned CC = ARMCC::AL;
+  unsigned VCC = ARMVCC::None;
+  if (ITBlock.instrInITBlock()) {
+    CC = ITBlock.getITCC();
     ITBlock.advanceITState();
+  } else if (VPTBlock.instrInVPTBlock()) {
+    VCC = VPTBlock.getVPTPred();
+    VPTBlock.advanceVPTState();
+  }
 
   const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
   unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
-  MCInst::iterator I = MI.begin();
-  for (unsigned i = 0; i < NumOps; ++i, ++I) {
-    if (I == MI.end()) break;
-    if (OpInfo[i].isPredicate()) {
-      if (CC != ARMCC::AL && !ARMInsts[MI.getOpcode()].isPredicable())
-        Check(S, SoftFail);
-      I = MI.insert(I, MCOperand::createImm(CC));
-      ++I;
-      if (CC == ARMCC::AL)
-        MI.insert(I, MCOperand::createReg(0));
-      else
-        MI.insert(I, MCOperand::createReg(ARM::CPSR));
-      return S;
-    }
+
+  MCInst::iterator CCI = MI.begin();
+  for (unsigned i = 0; i < NumOps; ++i, ++CCI) {
+    if (OpInfo[i].isPredicate() || CCI == MI.end()) break;
   }
 
-  I = MI.insert(I, MCOperand::createImm(CC));
-  ++I;
-  if (CC == ARMCC::AL)
-    MI.insert(I, MCOperand::createReg(0));
-  else
-    MI.insert(I, MCOperand::createReg(ARM::CPSR));
+  if (ARMInsts[MI.getOpcode()].isPredicable()) {
+    CCI = MI.insert(CCI, MCOperand::createImm(CC));
+    ++CCI;
+    if (CC == ARMCC::AL)
+      MI.insert(CCI, MCOperand::createReg(0));
+    else
+      MI.insert(CCI, MCOperand::createReg(ARM::CPSR));
+  } else if (CC != ARMCC::AL) {
+    Check(S, SoftFail);
+  }
+
+  MCInst::iterator VCCI = MI.begin();
+  unsigned VCCPos;
+  for (VCCPos = 0; VCCPos < NumOps; ++VCCPos, ++VCCI) {
+    if (ARM::isVpred(OpInfo[VCCPos].OperandType) || VCCI == MI.end()) break;
+  }
+
+  if (isVectorPredicable(MI.getOpcode())) {
+    VCCI = MI.insert(VCCI, MCOperand::createImm(VCC));
+    ++VCCI;
+    if (VCC == ARMVCC::None)
+      MI.insert(VCCI, MCOperand::createReg(0));
+    else
+      MI.insert(VCCI, MCOperand::createReg(ARM::P0));
+    if (OpInfo[VCCPos].OperandType == ARM::OPERAND_VPRED_R) {
+      int TiedOp = ARMInsts[MI.getOpcode()].getOperandConstraint(
+        VCCPos + 2, MCOI::TIED_TO);
+      assert(TiedOp >= 0 &&
+             "Inactive register in vpred_r is not tied to an output!");
+      MI.insert(VCCI, MI.getOperand(TiedOp));
+    }
+  } else if (VCC != ARMVCC::None) {
+    Check(S, SoftFail);
+  }
 
   return S;
 }
@@ -725,6 +824,10 @@
     CC = ARMCC::AL;
   if (ITBlock.instrInITBlock())
     ITBlock.advanceITState();
+  else if (VPTBlock.instrInVPTBlock()) {
+    CC = VPTBlock.getVPTPred();
+    VPTBlock.advanceVPTState();
+  }
 
   const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
   MCInst::iterator I = MI.begin();
@@ -820,7 +923,19 @@
       decodeInstruction(DecoderTableMVE32, MI, Insn32, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
     Size = 4;
+
+    // Nested VPT blocks are UNPREDICTABLE. Must be checked before we add
+    // the VPT predicate.
+    if (isVPTOpcode(MI.getOpcode()) && VPTBlock.instrInVPTBlock())
+      Result = MCDisassembler::SoftFail;
+
     Check(Result, AddThumbPredicate(MI));
+
+    if (isVPTOpcode(MI.getOpcode())) {
+      unsigned Mask = MI.getOperand(0).getImm();
+      VPTBlock.setVPTState(Mask);
+    }
+
     return Result;
   }
 
@@ -5724,6 +5839,130 @@
   return S;
 }
 
+static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                              uint64_t Address,
+                              const void *Decoder) {
+  if (RegNo > 7)
+    return MCDisassembler::Fail;
+
+  unsigned Register = QPRDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val,
+                                         uint64_t Address,
+                                         const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  // Parse VPT mask and encode it in the MCInst as an immediate with the same
+  // format as the it_mask.  That is, from the second 'e|t' encode 'e' as 1 and
+  // 't' as 0 and finish with a 1.
+  unsigned Imm = 0;
+  // We always start with a 't'.
+  unsigned CurBit = 0;
+  for (int i = 3; i >= 0; --i) {
+    // If the bit we are looking at is not the same as last one, invert the
+    // CurBit, if it is the same leave it as is.
+    CurBit ^= (Val >> i) & 1U;
+
+    // Encode the CurBit at the right place in the immediate.
+    Imm |= (CurBit << i);
+
+    // If we are done, finish the encoding with a 1.
+    if ((Val & ~(~0U << i)) == 0) {
+      Imm |= 1U << i;
+      break;
+    }
+  }
+
+  Inst.addOperand(MCOperand::createImm(Imm));
+
+  return S;
+}
+
+static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned RegNo,
+                                        uint64_t Address, const void *Decoder) {
+  // The vpred_r operand type includes an MQPR register field derived
+  // from the encoding. But we don't actually want to add an operand
+  // to the MCInst at this stage, because AddThumbPredicate will do it
+  // later, and will infer the register number from the TIED_TO
+  // constraint. So this is a deliberately empty decoder method that
+  // will inhibit the auto-generated disassembly code from adding an
+  // operand at all.
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst,
+                                                      unsigned Val,
+                                                      uint64_t Address,
+                                                      const void *Decoder) {
+  Inst.addOperand(MCOperand::createImm((Val & 0x1) == 0 ? ARMCC::EQ : ARMCC::NE));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst,
+                                                      unsigned Val,
+                                                      uint64_t Address,
+                                                      const void *Decoder) {
+  unsigned Code;
+  switch (Val & 0x3) {
+  case 0:
+    Code = ARMCC::GE;
+    break;
+  case 1:
+    Code = ARMCC::LT;
+    break;
+  case 2:
+    Code = ARMCC::GT;
+    break;
+  case 3:
+    Code = ARMCC::LE;
+    break;
+  }
+  Inst.addOperand(MCOperand::createImm(Code));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRestrictedUPredicateOperand(MCInst &Inst,
+                                                      unsigned Val,
+                                                      uint64_t Address,
+                                                      const void *Decoder) {
+  Inst.addOperand(MCOperand::createImm((Val & 0x1) == 0 ? ARMCC::HS : ARMCC::HI));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val,
+                                                     uint64_t Address,
+                                                     const void *Decoder) {
+  unsigned Code;
+  switch (Val) {
+  default:
+    return MCDisassembler::Fail;
+  case 0:
+    Code = ARMCC::EQ;
+    break;
+  case 1:
+    Code = ARMCC::NE;
+    break;
+  case 4:
+    Code = ARMCC::GE;
+    break;
+  case 5:
+    Code = ARMCC::LT;
+    break;
+  case 6:
+    Code = ARMCC::GT;
+    break;
+  case 7:
+    Code = ARMCC::LE;
+    break;
+  }
+
+  Inst.addOperand(MCOperand::createImm(Code));
+  return MCDisassembler::Success;
+}
+
 static unsigned FixedRegForVSTRVLDR_SYSREG(unsigned Opcode) {
   switch (Opcode) {
   case ARM::VSTR_P0_off:
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
@@ -246,6 +246,12 @@
   template<int64_t Angle, int64_t Remainder>
   void printComplexRotationOp(const MCInst *MI, unsigned OpNum,
                               const MCSubtargetInfo &STI, raw_ostream &O);
+  // MVE
+  void printVPTPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                const MCSubtargetInfo &STI,
+                                raw_ostream &O);
+  void printVPTMask(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
 
 private:
   unsigned DefaultAltIdx = ARM::NoRegAltName;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
@@ -1599,3 +1599,27 @@
   O << "#" << (Val * Angle) + Remainder;
 }
 
+void ARMInstPrinter::printVPTPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
+                                              raw_ostream &O) {
+  ARMVCC::VPTCodes CC = (ARMVCC::VPTCodes)MI->getOperand(OpNum).getImm();
+  if (CC != ARMVCC::None)
+    O << ARMVPTPredToString(CC);
+}
+
+void ARMInstPrinter::printVPTMask(const MCInst *MI, unsigned OpNum,
+                                  const MCSubtargetInfo &STI,
+                                  raw_ostream &O) {
+  // (3 - the number of trailing zeroes) is the number of them / else.
+  unsigned Mask = MI->getOperand(OpNum).getImm();
+  unsigned NumTZ = countTrailingZeros(Mask);
+  assert(NumTZ <= 3 && "Invalid VPT mask!");
+  for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+    bool T = ((Mask >> Pos) & 1) == 0;
+    if (T)
+      O << 't';
+    else
+      O << 'e';
+  }
+}
+
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -412,6 +412,9 @@
                                 unsigned EncodedValue,
                                 const MCSubtargetInfo &STI) const;
 
+  uint32_t getPowerTwoOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &Fixups,
+                              const MCSubtargetInfo &STI) const;
   void EmitByte(unsigned char C, raw_ostream &OS) const {
     OS << (char)C;
   }
@@ -436,6 +439,13 @@
   uint32_t getBFAfterTargetOpValue(const MCInst &MI, unsigned OpIdx,
                                    SmallVectorImpl<MCFixup> &Fixups,
                                    const MCSubtargetInfo &STI) const;
+
+  uint32_t getVPTMaskOpValue(const MCInst &MI, unsigned OpIdx,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+  uint32_t getRestrictedCondCodeOpValue(const MCInst &MI, unsigned OpIdx,
+                                        SmallVectorImpl<MCFixup> &Fixups,
+                                        const MCSubtargetInfo &STI) const;
 };
 
 } // end anonymous namespace
@@ -521,6 +531,9 @@
   if (MO.isReg()) {
     unsigned Reg = MO.getReg();
     unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg);
+    if (STI.getFeatureBits()[ARM::HasMVEIntegerOps] ||
+        STI.getFeatureBits()[ARM::HasMVEFloatOps])
+      return RegNo;
 
     // Q registers are encoded as 2x their register number.
     switch (Reg) {
@@ -1798,6 +1811,85 @@
 
   return Diff == 4;
 }
+
+uint32_t ARMMCCodeEmitter::getVPTMaskOpValue(const MCInst &MI, unsigned OpIdx,
+                                             SmallVectorImpl<MCFixup> &Fixups,
+                                             const MCSubtargetInfo &STI)const {
+  const MCOperand MO = MI.getOperand(OpIdx);
+  assert(MO.isImm() && "Unexpected operand type!");
+
+  int Value = MO.getImm();
+  int Imm = 0;
+
+  // VPT Masks are actually encoded as a series of invert/don't invert bits,
+  // rather than true/false bits.
+  unsigned PrevBit = 0;
+  for (int i = 3; i >= 0; --i) {
+    unsigned Bit = (Value >> i) & 1;
+
+    // Check if we are at the end of the mask.
+    if ((Value & ~(~0U << i)) == 0) {
+      Imm |= (1 << i);
+      break;
+    }
+
+    // Convert the bit in the mask based on the previous bit.
+    if (Bit != PrevBit)
+      Imm |= (1 << i);
+
+    PrevBit = Bit;
+  }
+
+  return Imm;
+}
+
+uint32_t ARMMCCodeEmitter::getRestrictedCondCodeOpValue(
+    const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
+    const MCSubtargetInfo &STI) const {
+
+  const MCOperand MO = MI.getOperand(OpIdx);
+  assert(MO.isImm() && "Unexpected operand type!");
+
+  switch (MO.getImm()) {
+  default:
+    assert(0 && "Unexpected Condition!");
+    return 0;
+  case ARMCC::HS:
+  case ARMCC::EQ:
+    return 0;
+  case ARMCC::HI:
+  case ARMCC::NE:
+    return 1;
+  case ARMCC::GE:
+    return 4;
+  case ARMCC::LT:
+    return 5;
+  case ARMCC::GT:
+    return 6;
+  case ARMCC::LE:
+    return 7;
+  }
+}
+
+uint32_t ARMMCCodeEmitter::
+getPowerTwoOpValue(const MCInst &MI, unsigned OpIdx,
+                   SmallVectorImpl<MCFixup> &Fixups,
+                   const MCSubtargetInfo &STI) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  assert(MO.isImm() && "Unexpected operand type!");
+
+  int Value = MO.getImm();
+  assert(Value == 1 || Value == 2 || Value == 4 || Value == 8);
+
+  if (Value == 1)
+    return 0;
+  if (Value == 2)
+    return 1;
+  if (Value == 4)
+    return 2;
+  return 3;
+}
+
 #include "ARMGenMCCodeEmitter.inc"
 
 MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -14,6 +14,7 @@
 #define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include <memory>
 #include <string>
 
@@ -94,6 +95,20 @@
 
 /// Construct ARM Mach-O relocation info.
 MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
+
+namespace ARM {
+enum OperandType {
+  OPERAND_VPRED_R = MCOI::OPERAND_FIRST_TARGET,
+  OPERAND_VPRED_N,
+};
+inline bool isVpred(OperandType op) {
+  return op == OPERAND_VPRED_R || op == OPERAND_VPRED_N;
+}
+inline bool isVpred(uint8_t op) {
+  return isVpred(static_cast<OperandType>(op));
+}
+} // end namespace ARM
+
 } // End llvm namespace
 
 // Defines symbolic names for ARM registers.  This defines a mapping from
diff --git a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
--- a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
+++ b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
@@ -66,6 +66,30 @@
 }
 } // end namespace ARMCC
 
+namespace ARMVCC {
+  enum VPTCodes {
+    None = 0,
+    Then,
+    Else
+  };
+}
+
+inline static const char *ARMVPTPredToString(ARMVCC::VPTCodes CC) {
+  switch (CC) {
+  case ARMVCC::None:  return "none";
+  case ARMVCC::Then:  return "t";
+  case ARMVCC::Else:  return "e";
+  }
+  llvm_unreachable("Unknown VPT code");
+}
+
+inline static unsigned ARMVectorCondCodeFromString(StringRef CC) {
+  return StringSwitch<unsigned>(CC.lower())
+    .Case("t", ARMVCC::Then)
+    .Case("e", ARMVCC::Else)
+    .Default(~0U);
+}
+
 inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
   switch (CC) {
   case ARMCC::EQ:  return "eq";
diff --git a/llvm/test/MC/ARM/mve-vpt.s b/llvm/test/MC/ARM/mve-vpt.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/ARM/mve-vpt.s
@@ -0,0 +1,89 @@
+# RUN: not llvm-mc -triple=thumbv8.1m.main-none-eabi -mattr=+mve -show-encoding  < %s \
+# RUN:   | FileCheck --check-prefix=CHECK-NOFP %s
+# RUN: not llvm-mc -triple=thumbv8.1m.main-none-eabi -mattr=+mve.fp,+fp64 -show-encoding  < %s 2>%t \
+# RUN:   | FileCheck --check-prefix=CHECK %s
+# RUN:     FileCheck --check-prefix=ERROR < %t %s
+
+# CHECK: vabav.s8  r0, q1, q3 @ encoding: [0x82,0xee,0x07,0x0f]
+# CHECK-NOFP: vabav.s8  r0, q1, q3 @ encoding: [0x82,0xee,0x07,0x0f]
+vabav.s8 r0, q1, q3
+
+# CHECK: vabav.s16  r0, q1, q3 @ encoding: [0x92,0xee,0x07,0x0f]
+# CHECK-NOFP: vabav.s16  r0, q1, q3 @ encoding: [0x92,0xee,0x07,0x0f]
+vabav.s16 r0, q1, q3
+
+# CHECK: vabav.s32  r0, q1, q3 @ encoding: [0xa2,0xee,0x07,0x0f]
+# CHECK-NOFP: vabav.s32  r0, q1, q3 @ encoding: [0xa2,0xee,0x07,0x0f]
+vabav.s32 r0, q1, q3
+
+# CHECK: vabav.u8  r0, q1, q3 @ encoding: [0x82,0xfe,0x07,0x0f]
+# CHECK-NOFP: vabav.u8  r0, q1, q3 @ encoding: [0x82,0xfe,0x07,0x0f]
+vabav.u8 r0, q1, q3
+
+# CHECK: vabav.u16  r0, q1, q3 @ encoding: [0x92,0xfe,0x07,0x0f]
+# CHECK-NOFP: vabav.u16  r0, q1, q3 @ encoding: [0x92,0xfe,0x07,0x0f]
+vabav.u16 r0, q1, q3
+
+# CHECK: vabav.u32  r0, q1, q3 @ encoding: [0xa2,0xfe,0x07,0x0f]
+# CHECK-NOFP: vabav.u32  r0, q1, q3 @ encoding: [0xa2,0xfe,0x07,0x0f]
+vabav.u32 r0, q1, q3
+
+# ERROR: [[@LINE+1]]:{{[0-9]+}}: {{error|note}}: VPT predicated instructions must be in VPT block
+vabavt.s32 lr, q1, q3
+
+# ERROR: [[@LINE+1]]:{{[0-9]+}}: {{error|note}}: VPT predicated instructions must be in VPT block
+vabave.u8 r12, q1, q3
+
+# ERROR: [[@LINE+2]]:{{[0-9]+}}: {{error|note}}: instructions in IT block must be predicable
+it eq
+vabav.s16 lr, q1, q3
+
+# CHECK: vpteee.i8 eq, q0, q1 @ encoding: [0x41,0xfe,0x02,0x2f]
+# CHECK-NOFP: vpteee.i8 eq, q0, q1 @ encoding: [0x41,0xfe,0x02,0x2f]
+vpteee.i8 eq, q0, q1
+vabavt.s32 lr, q1, q3
+vabave.s32 lr, q1, q3
+vabave.s32 lr, q1, q3
+vabave.s32 lr, q1, q3
+
+# CHECK: vptttt.s32 gt, q0, q1 @ encoding: [0x21,0xfe,0x03,0x3f]
+# CHECK-NOFP: vptttt.s32 gt, q0, q1 @ encoding: [0x21,0xfe,0x03,0x3f]
+vptttt.s32 gt, q0, q1
+vabavt.u32 lr, q1, q3
+vabavt.s32 lr, q1, q3
+vabavt.s16 lr, q1, q3
+vabavt.s8 lr, q1, q3
+
+# ERROR: [[@LINE+2]]:{{[0-9]+}}: {{error|note}}: instruction in VPT block must be predicable
+vpt.s8 le, q0, q1
+cinc lr, r2, lo
+
+# ----------------------------------------------------------------------
+# The following tests have to go last because of the NOFP-NOT checks inside the
+# VPT block.
+
+# CHECK: vptete.f16 ne, q0, q1 @ encoding: [0x71,0xfe,0x82,0xef]
+# CHECK-NOFP-NOT: vptete.f16 ne, q0, q1 @ encoding: [0x71,0xfe,0x82,0xef]
+vptete.f16 ne, q0, q1
+vabavt.s32 lr, q1, q3
+vabave.u32 lr, q1, q3
+vabavt.s32 lr, q1, q3
+vabave.s16 lr, q1, q3
+# ERROR: [[@LINE+1]]:{{[0-9]+}}: {{error|note}}: VPT predicated instructions must be in VPT block
+vabavt.s32 lr, q1, q3
+
+# CHECK: vmaxnm.f32 q0, q1, q4  @ encoding: [0x02,0xff,0x58,0x0f]
+# CHECK-NOFP-NOT: vmaxnm.f32 q0, q1, q4  @ encoding: [0x02,0xff,0x58,0x0f]
+vmaxnm.f32 q0, q1, q4
+
+# CHECK: vminnm.f16 q3, q0, q1  @ encoding: [0x30,0xff,0x52,0x6f]
+# CHECK-NOFP-NOT: vminnm.f16 q3, q0, q1  @ encoding: [0x30,0xff,0x52,0x6f]
+vminnm.f16 q3, q0, q1
+
+# CHECK: vpte.i8 eq, q0, q0
+# CHECK: vmaxnmt.f16 q1, q6, q2  @ encoding: [0x1c,0xff,0x54,0x2f]
+# CHECK-NOFP-NOT: vmaxnmt.f16 q1, q6, q2  @ encoding: [0x1c,0xff,0x54,0x2f]
+# CHECK-NOFP-NOT: vmaxnme.f16 q1, q6, q2  @ encoding: [0x1c,0xff,0x54,0x2f]
+vpte.i8 eq, q0, q0
+vmaxnmt.f16 q1, q6, q2
+vmaxnme.f16 q1, q6, q2
diff --git a/llvm/test/MC/Disassembler/ARM/mve-vpt.txt b/llvm/test/MC/Disassembler/ARM/mve-vpt.txt
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/Disassembler/ARM/mve-vpt.txt
@@ -0,0 +1,45 @@
+# RUN: llvm-mc -disassemble -triple=thumbv8.1m.main-none-eabi -mattr=+mve.fp,+fp64 -show-encoding %s | FileCheck %s
+# RUN: not llvm-mc -disassemble -triple=thumbv8.1m.main-none-eabi -show-encoding %s &> %t
+# RUN: FileCheck --check-prefix=CHECK-NOMVE < %t %s
+
+[0x82 0xee 0x07 0x0f]
+# CHECK: vabav.s8  r0, q1, q3
+# CHECK-NOMVE: [[@LINE-2]]:2: warning: invalid instruction encoding
+
+[0x92 0xee 0x07 0x0f]
+# CHECK: vabav.s16 r0, q1, q3
+# CHECK-NOMVE: [[@LINE-2]]:2: warning: invalid instruction encoding
+
+[0xa2 0xee 0x07 0x0f]
+# CHECK: vabav.s32 r0, q1, q3
+# CHECK-NOMVE: [[@LINE-2]]:2: warning: invalid instruction encoding
+
+[0x82 0xfe 0x07 0x0f]
+# CHECK: vabav.u8 r0, q1, q3
+# CHECK-NOMVE: [[@LINE-2]]:2: warning: invalid instruction encoding
+
+[0x92 0xfe 0x07 0x0f]
+# CHECK: vabav.u16  r0, q1, q3
+# CHECK-NOMVE: [[@LINE-2]]:2: warning: invalid instruction encoding
+
+[0xa2 0xfe 0x07 0x0f]
+# CHECK: vabav.u32  r0, q1, q3
+# CHECK-NOMVE: [[@LINE-2]]:2: warning: invalid instruction encoding
+
+# CHECK: vpte.i8 eq, q0, q0 @ encoding: [0x41,0xfe,0x00,0x8f]
+# CHECK-NOMVE: [[@LINE+5]]:2: warning: invalid instruction encoding
+# CHECK: vabavt.s16 lr, q3, q4  @ encoding: [0x96,0xee,0x09,0xef]
+# CHECK-NOMVE: [[@LINE+4]]:2: warning: invalid instruction encoding
+# CHECK: vabave.s16 lr, q3, q4  @ encoding: [0x96,0xee,0x09,0xef]
+# CHECK-NOMVE: [[@LINE+3]]:2: warning: invalid instruction encoding
+[0x41,0xfe,0x00,0x8f]
+[0x96,0xee,0x09,0xef]
+[0x96,0xee,0x09,0xef]
+
+# CHECK: vmaxnm.f32 q0, q1, q4  @ encoding: [0x02,0xff,0x58,0x0f]
+# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
+[0x02,0xff,0x58,0x0f]
+
+# CHECK: vminnm.f16 q3, q0, q1  @ encoding: [0x30,0xff,0x52,0x6f]
+# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
+[0x30,0xff,0x52,0x6f]