Index: llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
===================================================================
--- llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -1294,9 +1294,28 @@
                                          SMLoc IDLoc) {
   MCContext &Context = getParser().getContext();
   const MCRegisterInfo *RI = getContext().getRegisterInfo();
-  std::string r = "r";
-  std::string v = "v";
-  std::string Colon = ":";
+  const std::string r = "r";
+  const std::string v = "v";
+  const std::string Colon = ":";
+  using RegPairVals = std::pair<unsigned, unsigned>;
+  auto GetRegPair = [this, r](RegPairVals RegPair) {
+    const std::string R1 = r + utostr(RegPair.first);
+    const std::string R2 = r + utostr(RegPair.second);
+
+    return std::make_pair(matchRegister(R1), matchRegister(R2));
+  };
+  auto GetScalarRegs = [RI, GetRegPair](unsigned RegPair) {
+    const unsigned Lower = RI->getEncodingValue(RegPair);
+    const RegPairVals RegPair_ = std::make_pair(Lower + 1, Lower);
+
+    return GetRegPair(RegPair_);
+  };
+  auto GetVecRegs = [GetRegPair](unsigned VecRegPair) {
+    const RegPairVals RegPair =
+        HexagonMCInstrInfo::GetVecRegPairIndices(VecRegPair);
+
+    return GetRegPair(RegPair);
+  };
 
   bool is32bit = false; // used to distinguish between CONST32 and CONST64
   switch (Inst.getOpcode()) {
@@ -1388,14 +1407,9 @@
   // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)"
   case Hexagon::A2_tfrp: {
     MCOperand &MO = Inst.getOperand(1);
-    unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
-    std::string R1 = r + utostr(RegPairNum + 1);
-    StringRef Reg1(R1);
-    MO.setReg(matchRegister(Reg1));
-    // Add a new operand for the second register in the pair.
-    std::string R2 = r + utostr(RegPairNum);
-    StringRef Reg2(R2);
-    Inst.addOperand(MCOperand::createReg(matchRegister(Reg2)));
+    const std::pair<unsigned, unsigned> RegPair = GetScalarRegs(MO.getReg());
+    MO.setReg(RegPair.first);
+    Inst.addOperand(MCOperand::createReg(RegPair.second));
     Inst.setOpcode(Hexagon::A2_combinew);
     break;
   }
@@ -1403,14 +1417,9 @@
   case Hexagon::A2_tfrpt:
   case Hexagon::A2_tfrpf: {
     MCOperand &MO = Inst.getOperand(2);
-    unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
-    std::string R1 = r + utostr(RegPairNum + 1);
-    StringRef Reg1(R1);
-    MO.setReg(matchRegister(Reg1));
-    // Add a new operand for the second register in the pair.
-    std::string R2 = r + utostr(RegPairNum);
-    StringRef Reg2(R2);
-    Inst.addOperand(MCOperand::createReg(matchRegister(Reg2)));
+    const std::pair<unsigned, unsigned> RegPair = GetScalarRegs(MO.getReg());
+    MO.setReg(RegPair.first);
+    Inst.addOperand(MCOperand::createReg(RegPair.second));
     Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt)
                        ? Hexagon::C2_ccombinewt
                        : Hexagon::C2_ccombinewf);
@@ -1419,14 +1428,9 @@
   case Hexagon::A2_tfrptnew:
   case Hexagon::A2_tfrpfnew: {
     MCOperand &MO = Inst.getOperand(2);
-    unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
-    std::string R1 = r + utostr(RegPairNum + 1);
-    StringRef Reg1(R1);
-    MO.setReg(matchRegister(Reg1));
-    // Add a new operand for the second register in the pair.
-    std::string R2 = r + utostr(RegPairNum);
-    StringRef Reg2(R2);
-    Inst.addOperand(MCOperand::createReg(matchRegister(Reg2)));
+    const std::pair<unsigned, unsigned> RegPair = GetScalarRegs(MO.getReg());
+    MO.setReg(RegPair.first);
+    Inst.addOperand(MCOperand::createReg(RegPair.second));
     Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
                        ? Hexagon::C2_ccombinewnewt
                        : Hexagon::C2_ccombinewnewf);
@@ -1436,12 +1440,9 @@
   // Translate a "$Vdd = $Vss" to "$Vdd = vcombine($Vs, $Vt)"
   case Hexagon::V6_vassignp: {
     MCOperand &MO = Inst.getOperand(1);
-    unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
-    std::string R1 = v + utostr(RegPairNum + 1);
-    MO.setReg(MatchRegisterName(R1));
-    // Add a new operand for the second register in the pair.
-    std::string R2 = v + utostr(RegPairNum);
-    Inst.addOperand(MCOperand::createReg(MatchRegisterName(R2)));
+    const std::pair<unsigned, unsigned> RegPair = GetVecRegs(MO.getReg());
+    MO.setReg(RegPair.first);
+    Inst.addOperand(MCOperand::createReg(RegPair.second));
     Inst.setOpcode(Hexagon::V6_vcombine);
     break;
   }
Index: llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
===================================================================
--- llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -498,9 +498,13 @@
     } else if (HexagonMCInstrInfo::hasNewValue(*MCII, Inst)) {
       unsigned Producer =
           HexagonMCInstrInfo::getNewValueOperand(*MCII, Inst).getReg();
-      if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15)
-        Producer = ((Producer - Hexagon::W0) << 1) + SubregBit + Hexagon::V0;
-      else if (SubregBit)
+
+      if (HexagonMCInstrInfo::IsVecRegPair(Producer)) {
+        const bool Rev = HexagonMCInstrInfo::IsReverseVecRegPair(Producer);
+        const unsigned ProdPairIndex =
+            Rev ? Producer - Hexagon::WR0 : Producer - Hexagon::W0;
+        Producer = (ProdPairIndex << 1) + SubregBit + Hexagon::V0;
+      } else if (SubregBit)
         // Hexagon PRM 10.11 New-value operands
         // Nt[0] is reserved and should always be encoded as zero.
         return MCDisassembler::Fail;
@@ -606,12 +610,16 @@
                                              uint64_t /*Address*/,
                                              const void *Decoder) {
   static const MCPhysReg HvxWRDecoderTable[] = {
-      Hexagon::W0,  Hexagon::W1,  Hexagon::W2,  Hexagon::W3,
-      Hexagon::W4,  Hexagon::W5,  Hexagon::W6,  Hexagon::W7,
-      Hexagon::W8,  Hexagon::W9,  Hexagon::W10, Hexagon::W11,
-      Hexagon::W12, Hexagon::W13, Hexagon::W14, Hexagon::W15};
+      Hexagon::W0,   Hexagon::WR0,  Hexagon::W1,   Hexagon::WR1,  Hexagon::W2,
+      Hexagon::WR2,  Hexagon::W3,   Hexagon::WR3,  Hexagon::W4,   Hexagon::WR4,
+      Hexagon::W5,   Hexagon::WR5,  Hexagon::W6,   Hexagon::WR6,  Hexagon::W7,
+      Hexagon::WR7,  Hexagon::W8,   Hexagon::WR8,  Hexagon::W9,   Hexagon::WR9,
+      Hexagon::W10,  Hexagon::WR10, Hexagon::W11,  Hexagon::WR11, Hexagon::W12,
+      Hexagon::WR12, Hexagon::W13,  Hexagon::WR13, Hexagon::W14,  Hexagon::WR14,
+      Hexagon::W15,  Hexagon::WR15,
+  };
 
-  return (DecodeRegisterClass(Inst, RegNo >> 1, HvxWRDecoderTable));
+  return DecodeRegisterClass(Inst, RegNo, HvxWRDecoderTable);
 }
 
 LLVM_ATTRIBUTE_UNUSED  // Suppress warning temporarily.
Index: llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
===================================================================
--- llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -172,6 +172,13 @@
   Reserved.set(Hexagon::C8);
   Reserved.set(Hexagon::USR_OVF);
 
+  // Leveraging these registers will require more work to recognize
+  // the new semantics posed, Hi/LoVec patterns, etc.
+  // Note well: if enabled, they should be restricted to only
+  // where `HST.useHVXOps() && HST.hasV67Ops()` is true.
+  for (auto Reg : Hexagon_MC::GetVectRegRev())
+    Reserved.set(Reg);
+
   if (MF.getSubtarget<HexagonSubtarget>().hasReservedR19())
     Reserved.set(Hexagon::R19);
 
Index: llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
===================================================================
--- llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -18,6 +18,12 @@
     let HWEncoding{4-0} = num;
   }
 
+  // These registers are used to preserve a distinction between
+  // vector register pairs of differing order.
+  class HexagonFakeReg<string n> : Register<n> {
+    let isArtificial = 1;
+  }
+
   class HexagonDoubleReg<bits<5> num, string n, list<Register> subregs,
                          list<string> alt = []> :
         RegisterWithSubRegs<n, subregs> {
@@ -30,6 +36,13 @@
   class Ri<bits<5> num, string n, list<string> alt = []> :
         HexagonReg<num, n, alt>;
 
+  // Rp - false/pseudo registers.  These registers are used
+  // to provide a distinct set of aliases for both styles of vector
+  // register pairs without encountering subregister indexing constraints.
+  class R_fake<string n> :
+        HexagonFakeReg<n>;
+
+
   // Rf - 32-bit floating-point registers.
   class Rf<bits<5> num, string n> : HexagonReg<num, n>;
 
@@ -81,6 +94,7 @@
   def isub_hi  : SubRegIndex<32, 32>;
   def vsub_lo  : SubRegIndex<512>;
   def vsub_hi  : SubRegIndex<512, 512>;
+  def vsub_fake: SubRegIndex<512>;
   def wsub_lo  : SubRegIndex<1024>;
   def wsub_hi  : SubRegIndex<1024, 1024>;
   def subreg_overflow : SubRegIndex<1, 0>;
@@ -183,27 +197,49 @@
 
   foreach i = 0-31 in {
     def V#i  : Ri<i, "v"#i>,  DwarfRegNum<[!add(i, 99)]>;
+    def VF#i : R_fake<"__"#!add(i,999999)>,  DwarfRegNum<[!add(i, 999999)]>;
+    def VFR#i : R_fake<"__"#!add(i,9999999)>,  DwarfRegNum<[!add(i, 9999999)]>;
   }
   def VTMP : Ri<0, "vtmp">, DwarfRegNum<[131]>;
 
   // Aliases of the V* registers used to hold double vec values.
-  let SubRegIndices = [vsub_lo, vsub_hi], CoveredBySubRegs = 1 in {
-  def W0  : Rd< 0,  "v1:0",  [V0,  V1]>,  DwarfRegNum<[99]>;
-  def W1  : Rd< 2,  "v3:2",  [V2,  V3]>,  DwarfRegNum<[101]>;
-  def W2  : Rd< 4,  "v5:4",  [V4,  V5]>,  DwarfRegNum<[103]>;
-  def W3  : Rd< 6,  "v7:6",  [V6,  V7]>,  DwarfRegNum<[105]>;
-  def W4  : Rd< 8,  "v9:8",  [V8,  V9]>,  DwarfRegNum<[107]>;
-  def W5  : Rd<10, "v11:10", [V10, V11]>, DwarfRegNum<[109]>;
-  def W6  : Rd<12, "v13:12", [V12, V13]>, DwarfRegNum<[111]>;
-  def W7  : Rd<14, "v15:14", [V14, V15]>, DwarfRegNum<[113]>;
-  def W8  : Rd<16, "v17:16", [V16, V17]>, DwarfRegNum<[115]>;
-  def W9  : Rd<18, "v19:18", [V18, V19]>, DwarfRegNum<[117]>;
-  def W10 : Rd<20, "v21:20", [V20, V21]>, DwarfRegNum<[119]>;
-  def W11 : Rd<22, "v23:22", [V22, V23]>, DwarfRegNum<[121]>;
-  def W12 : Rd<24, "v25:24", [V24, V25]>, DwarfRegNum<[123]>;
-  def W13 : Rd<26, "v27:26", [V26, V27]>, DwarfRegNum<[125]>;
-  def W14 : Rd<28, "v29:28", [V28, V29]>, DwarfRegNum<[127]>;
-  def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>;
+  let SubRegIndices = [vsub_lo, vsub_hi, vsub_fake], CoveredBySubRegs = 1 in {
+  def W0  : Rd< 0,  "v1:0",  [V0,  V1, VF0]>,  DwarfRegNum<[99]>;
+  def W1  : Rd< 2,  "v3:2",  [V2,  V3, VF1]>,  DwarfRegNum<[101]>;
+  def W2  : Rd< 4,  "v5:4",  [V4,  V5, VF2]>,  DwarfRegNum<[103]>;
+  def W3  : Rd< 6,  "v7:6",  [V6,  V7, VF3]>,  DwarfRegNum<[105]>;
+  def W4  : Rd< 8,  "v9:8",  [V8,  V9, VF4]>,  DwarfRegNum<[107]>;
+  def W5  : Rd<10, "v11:10", [V10, V11, VF5]>, DwarfRegNum<[109]>;
+  def W6  : Rd<12, "v13:12", [V12, V13, VF6]>, DwarfRegNum<[111]>;
+  def W7  : Rd<14, "v15:14", [V14, V15, VF7]>, DwarfRegNum<[113]>;
+  def W8  : Rd<16, "v17:16", [V16, V17, VF8]>, DwarfRegNum<[115]>;
+  def W9  : Rd<18, "v19:18", [V18, V19, VF9]>, DwarfRegNum<[117]>;
+  def W10 : Rd<20, "v21:20", [V20, V21, VF10]>, DwarfRegNum<[119]>;
+  def W11 : Rd<22, "v23:22", [V22, V23, VF11]>, DwarfRegNum<[121]>;
+  def W12 : Rd<24, "v25:24", [V24, V25, VF12]>, DwarfRegNum<[123]>;
+  def W13 : Rd<26, "v27:26", [V26, V27, VF13]>, DwarfRegNum<[125]>;
+  def W14 : Rd<28, "v29:28", [V28, V29, VF14]>, DwarfRegNum<[127]>;
+  def W15 : Rd<30, "v31:30", [V30, V31, VF15]>, DwarfRegNum<[129]>;
+  }
+
+  // Reverse Aliases of the V* registers used to hold double vec values.
+  let SubRegIndices = [vsub_lo, vsub_hi, vsub_fake], CoveredBySubRegs = 1 in {
+  def WR0 : Rd< 1,  "v0:1",  [V0, V1, VFR0]>,  DwarfRegNum<[161]>;
+  def WR1 : Rd< 3,  "v2:3",  [V2, V3, VFR1]>,  DwarfRegNum<[162]>;
+  def WR2 : Rd< 5,  "v4:5",  [V4, V5, VFR2]>,  DwarfRegNum<[163]>;
+  def WR3 : Rd< 7,  "v6:7",  [V6, V7, VFR3]>,  DwarfRegNum<[164]>;
+  def WR4 : Rd< 9,  "v8:9",  [V8, V9, VFR4]>,  DwarfRegNum<[165]>;
+  def WR5 : Rd<11, "v10:11", [V10, V11, VFR5]>,  DwarfRegNum<[166]>;
+  def WR6 : Rd<13, "v12:13", [V12, V13, VFR6]>,  DwarfRegNum<[167]>;
+  def WR7 : Rd<15, "v14:15", [V14, V15, VFR7]>,  DwarfRegNum<[168]>;
+  def WR8 : Rd<17, "v16:17", [V16, V17, VFR8]>,  DwarfRegNum<[169]>;
+  def WR9 : Rd<19, "v18:19", [V18, V19, VFR9]>,  DwarfRegNum<[170]>;
+  def WR10: Rd<21, "v20:21", [V20, V21, VFR10]>,  DwarfRegNum<[171]>;
+  def WR11: Rd<23, "v22:23", [V22, V23, VFR11]>,  DwarfRegNum<[172]>;
+  def WR12: Rd<25, "v24:25", [V24, V25, VFR12]>,  DwarfRegNum<[173]>;
+  def WR13: Rd<27, "v26:27", [V26, V27, VFR13]>,  DwarfRegNum<[174]>;
+  def WR14: Rd<29, "v28:29", [V28, V29, VFR14]>,  DwarfRegNum<[175]>;
+  def WR15: Rd<31, "v30:31", [V30, V31, VFR15]>,  DwarfRegNum<[176]>;
   }
 
   // Aliases of the V* registers used to hold quad vec values.
@@ -314,7 +350,7 @@
 }
 
 def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32], 1024,
-  (add (sequence "W%u", 0, 15))> {
+  (add (sequence "W%u", 0, 15), (sequence "WR%u", 0, 15))> {
   let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
     [RegInfo<1024,1024,1024>, RegInfo<2048,2048,2048>, RegInfo<1024,1024,1024>]>;
 }
@@ -365,6 +401,10 @@
        FRAMELIMIT, FRAMEKEY, PKTCOUNTLO, PKTCOUNTHI, UTIMERLO, UTIMERHI,
        M0, M1, USR)>;
 
+let Size = 64 in
+def VectRegRev : RegisterClass<"Hexagon", [i64], 64,
+  (add (sequence "WR%u", 0, 15))>;
+
 let isAllocatable = 0 in
 def UsrBits : RegisterClass<"Hexagon", [i1], 0, (add USR_OVF)>;
 
Index: llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp
===================================================================
--- llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp
+++ llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp
@@ -71,9 +71,10 @@
 char HexagonVectorPrint::ID = 0;
 
 static bool isVecReg(unsigned Reg) {
-  return (Reg >= Hexagon::V0 && Reg <= Hexagon::V31)
-      || (Reg >= Hexagon::W0 && Reg <= Hexagon::W15)
-      || (Reg >= Hexagon::Q0 && Reg <= Hexagon::Q3);
+  return (Reg >= Hexagon::V0 && Reg <= Hexagon::V31) ||
+         (Reg >= Hexagon::W0 && Reg <= Hexagon::W15) ||
+         (Reg >= Hexagon::WR0 && Reg <= Hexagon::WR15) ||
+         (Reg >= Hexagon::Q0 && Reg <= Hexagon::Q3);
 }
 
 static std::string getStringReg(unsigned R) {
Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
===================================================================
--- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
+++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -72,6 +72,10 @@
   using ReadOnlyIterator = std::set<unsigned>::iterator;
   std::set<unsigned> ReadOnly;
 
+  // Contains the vector-pair-registers with the even number
+  // first ("v0:1", e.g.) used/def'd in this packet.
+  std::set<unsigned> ReversePairs;
+
   void init();
   void init(MCInst const &);
   void initReg(MCInst const &, unsigned, unsigned &PredReg, bool &isTrue);
@@ -94,6 +98,7 @@
   bool checkAXOK();
   bool checkHWLoop();
   bool checkCOFMax1();
+  bool checkLegalVecRegPair();
 
   static void compoundRegisterMap(unsigned &);
 
Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
===================================================================
--- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -81,6 +81,9 @@
       if (!MCSubRegIterator(*SRI, &RI).isValid())
         // Skip super-registers used indirectly.
         Uses.insert(*SRI);
+
+  if (HexagonMCInstrInfo::IsReverseVecRegPair(R))
+    ReversePairs.insert(R);
 }
 
 void HexagonMCChecker::init(MCInst const &MCI) {
@@ -133,6 +136,9 @@
     if (R == Hexagon::C8)
       R = Hexagon::USR;
 
+    if (HexagonMCInstrInfo::IsReverseVecRegPair(R))
+      ReversePairs.insert(R);
+
     // Note register definitions, direct ones as well as indirect side-effects.
     // Super-registers are not tracked directly, but their components.
     for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
@@ -192,7 +198,7 @@
                                    MCSubtargetInfo const &STI, MCInst &mcb,
                                    MCRegisterInfo const &ri, bool ReportErrors)
     : Context(Context), MCB(mcb), RI(ri), MCII(MCII), STI(STI),
-      ReportErrors(ReportErrors) {
+      ReportErrors(ReportErrors), ReversePairs() {
   init();
 }
 
@@ -200,7 +206,10 @@
                                    MCSubtargetInfo const &STI,
                                    bool CopyReportErrors)
     : Context(Other.Context), MCB(Other.MCB), RI(Other.RI), MCII(Other.MCII),
-      STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false) {}
+      STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false),
+      ReversePairs() {
+  init();
+}
 
 bool HexagonMCChecker::check(bool FullCheck) {
   bool chkP = checkPredicates();
@@ -218,8 +227,9 @@
   bool chkAXOK = checkAXOK();
   bool chkCofMax1 = checkCOFMax1();
   bool chkHWLoop = checkHWLoop();
+  bool chkLegalVecRegPair = checkLegalVecRegPair();
   bool chk = chkP && chkNV && chkR && chkRRO && chkS && chkSh && chkSl &&
-             chkAXOK && chkCofMax1 && chkHWLoop;
+             chkAXOK && chkCofMax1 && chkHWLoop && chkLegalVecRegPair;
 
   return chk;
 }
@@ -729,3 +739,16 @@
   if (ReportErrors)
     Context.reportWarning(MCB.getLoc(), Msg);
 }
+
+bool HexagonMCChecker::checkLegalVecRegPair() {
+  const bool IsPermitted = STI.getFeatureBits()[Hexagon::ArchV67];
+  const bool HasReversePairs = ReversePairs.size() != 0;
+
+  if (!IsPermitted && HasReversePairs) {
+    for (auto R : ReversePairs)
+      reportError("register pair `" + Twine(RI.getName(R)) +
+                  "' is not permitted for this architecture");
+    return false;
+  }
+  return true;
+}
Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
===================================================================
--- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -391,15 +391,9 @@
 
 static bool RegisterMatches(unsigned Consumer, unsigned Producer,
                             unsigned Producer2) {
-  if (Consumer == Producer)
-    return true;
-  if (Consumer == Producer2)
-    return true;
-  // Calculate if we're a single vector consumer referencing a double producer
-  if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15)
-    if (Consumer >= Hexagon::V0 && Consumer <= Hexagon::V31)
-      return ((Consumer - Hexagon::V0) >> 1) == (Producer - Hexagon::W0);
-  return false;
+  return (Consumer == Producer) || (Consumer == Producer2) ||
+         HexagonMCInstrInfo::IsSingleConsumerRefPairProducer(Producer,
+                                                             Consumer);
 }
 
 /// EncodeSingleInstruction - Emit a single
@@ -735,7 +729,8 @@
     unsigned SOffset = 0;
     unsigned VOffset = 0;
     unsigned UseReg = MO.getReg();
-    unsigned DefReg1, DefReg2;
+    unsigned DefReg1 = Hexagon::NoRegister;
+    unsigned DefReg2 = Hexagon::NoRegister;
 
     auto Instrs = HexagonMCInstrInfo::bundleInstructions(*State.Bundle);
     const MCOperand *I = Instrs.begin() + State.Index - 1;
@@ -746,7 +741,8 @@
       if (HexagonMCInstrInfo::isImmext(Inst))
         continue;
 
-      DefReg1 = DefReg2 = 0;
+      DefReg1 = Hexagon::NoRegister;
+      DefReg2 = Hexagon::NoRegister;
       ++SOffset;
       if (HexagonMCInstrInfo::isVector(MCII, Inst)) {
         // Vector instructions don't count scalars.
Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
===================================================================
--- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -351,6 +351,16 @@
 unsigned SubregisterBit(unsigned Consumer, unsigned Producer,
                         unsigned Producer2);
 
+bool IsVecRegSingle(unsigned VecReg);
+bool IsVecRegPair(unsigned VecReg);
+bool IsReverseVecRegPair(unsigned VecReg);
+bool IsSingleConsumerRefPairProducer(unsigned Producer, unsigned Consumer);
+
+/// Returns an ordered pair of the constituent register ordinals for
+/// each of the elements of \a VecRegPair.  For example, Hexagon::W0 ("v0:1")
+/// returns { 0, 1 } and Hexagon::W1 ("v3:2") returns { 3, 2 }.
+std::pair<unsigned, unsigned> GetVecRegPairIndices(unsigned VecRegPair);
+
 // Attempt to find and replace compound pairs
 void tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
                  MCContext &Context, MCInst &MCI);
Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
===================================================================
--- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -676,6 +676,45 @@
   return (Flags & outerLoopMask) != 0;
 }
 
+bool HexagonMCInstrInfo::IsVecRegPair(unsigned VecReg) {
+  return (VecReg >= Hexagon::W0 && VecReg <= Hexagon::W15) ||
+         (VecReg >= Hexagon::WR0 && VecReg <= Hexagon::WR15);
+}
+
+bool HexagonMCInstrInfo::IsReverseVecRegPair(unsigned VecReg) {
+  return (VecReg >= Hexagon::WR0 && VecReg <= Hexagon::WR15);
+}
+
+bool HexagonMCInstrInfo::IsVecRegSingle(unsigned VecReg) {
+  return (VecReg >= Hexagon::V0 && VecReg <= Hexagon::V31);
+}
+
+std::pair<unsigned, unsigned>
+HexagonMCInstrInfo::GetVecRegPairIndices(unsigned VecRegPair) {
+  assert(IsVecRegPair(VecRegPair) &&
+         "VecRegPair must be a vector register pair");
+
+  const bool IsRev = IsReverseVecRegPair(VecRegPair);
+  const unsigned PairIndex =
+      2 * (IsRev ? VecRegPair - Hexagon::WR0 : VecRegPair - Hexagon::W0);
+
+  return IsRev ? std::make_pair(PairIndex, PairIndex + 1)
+               : std::make_pair(PairIndex + 1, PairIndex);
+}
+
+bool HexagonMCInstrInfo::IsSingleConsumerRefPairProducer(unsigned Producer,
+                                                         unsigned Consumer) {
+  if (IsVecRegPair(Producer) && IsVecRegSingle(Consumer)) {
+    const unsigned ProdPairIndex = IsReverseVecRegPair(Producer)
+                                       ? Producer - Hexagon::WR0
+                                       : Producer - Hexagon::W0;
+    const unsigned ConsumerSingleIndex = (Consumer - Hexagon::V0) >> 1;
+
+    return ConsumerSingleIndex == ProdPairIndex;
+  }
+  return false;
+}
+
 bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII,
                                       MCInst const &MCI) {
   const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -971,9 +1010,8 @@
                                             unsigned Producer2) {
   // If we're a single vector consumer of a double producer, set subreg bit
   // based on if we're accessing the lower or upper register component
-  if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15)
-    if (Consumer >= Hexagon::V0 && Consumer <= Hexagon::V31)
-      return (Consumer - Hexagon::V0) & 0x1;
+  if (IsVecRegPair(Producer) && IsVecRegSingle(Consumer))
+    return (Consumer - Hexagon::V0) & 0x1;
   if (Producer2 != Hexagon::NoRegister)
     return Consumer == Producer;
   return 0;
Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
===================================================================
--- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
 #define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
 
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include <cstdint>
 #include <string>
@@ -82,6 +83,8 @@
   void addArchSubtarget(MCSubtargetInfo const *STI,
                         StringRef FS);
   unsigned GetELFFlags(const MCSubtargetInfo &STI);
+
+  llvm::ArrayRef<MCPhysReg> GetVectRegRev();
 }
 
 MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
===================================================================
--- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -532,6 +532,8 @@
   return F->second;
 }
 
+llvm::ArrayRef<MCPhysReg> Hexagon_MC::GetVectRegRev() { return VectRegRev; }
+
 namespace {
 class HexagonMCInstrAnalysis : public MCInstrAnalysis {
 public:
Index: llvm/test/CodeGen/Hexagon/swp-sigma.ll
===================================================================
--- llvm/test/CodeGen/Hexagon/swp-sigma.ll
+++ llvm/test/CodeGen/Hexagon/swp-sigma.ll
@@ -2,28 +2,11 @@
 
 ; We do not pipeline sigma yet, but the non-pipelined version
 ; with good scheduling is pretty fast. The compiler generates
-; 19 packets, and the assembly version is 16.
+; 18 packets, and the assembly version is 16.
 
 ; CHECK:  loop0(.LBB0_[[LOOP:.]],
 ; CHECK: .LBB0_[[LOOP]]:
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
-; CHECK: }
+; CHECK-COUNT-17: }
 ; CHECK: }{{[ \t]*}}:endloop
 
 @g0 = external constant [10 x i16], align 128
Index: llvm/test/CodeGen/Hexagon/vect-regpairs.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/Hexagon/vect-regpairs.ll
@@ -0,0 +1,135 @@
+;RUN: llc -march=hexagon -mcpu=hexagonv66 -mhvx -filetype=obj < %s -o - | llvm-objdump -mv66 -mhvx -d - | FileCheck --check-prefix=CHECK-V66 %s
+;RUN: llc -march=hexagon -mcpu=hexagonv67 -mhvx -filetype=obj < %s -o - | llvm-objdump -mv67 -mhvx -d - | FileCheck --check-prefix=CHECK-V67 %s
+
+; Should not attempt to use v<even>:<odd> 'reverse' vector regpairs
+; on old or new arches (should not crash).
+
+; CHECK-V66: vcombine
+; CHECK-V67: vcombine
+declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>)
+declare <16 x i32> @llvm.hexagon.V6.vd0()
+declare <32 x i32> @llvm.hexagon.V6.vmpybus(<16 x i32>, i32)
+declare <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32>, <32 x i32>, i32)
+declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>)
+declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32 )
+declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>)
+declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32 )
+declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>)
+declare <16 x i32> @llvm.hexagon.V6.vmpyihb.acc(<16 x i32>, <16 x i32>, i32)
+declare <16 x i32> @llvm.hexagon.V6.vasrhubrndsat(<16 x i32>, <16 x i32>, i32)
+
+declare <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32>, <16 x i32>)
+declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32)
+declare <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32>, <16 x i32>, i32)
+declare <16 x i32> @llvm.hexagon.V6.vshuffob(<16 x i32>, <16 x i32>)
+
+
+define void @Gaussian7x7u8PerRow(i8* %src, i32 %stride, i32 %width, i8* %dst) #0 {
+entry:
+  %mul = mul i32 %stride, 3
+  %idx.neg = sub i32 0, %mul
+  %add.ptr = getelementptr i8, i8* %src, i32 %idx.neg
+  bitcast i8* %add.ptr to <16 x i32>*
+  %mul1 = shl i32 %stride, 1
+  %idx.neg2 = sub i32 0, %mul1
+  %add.ptr3 = getelementptr i8, i8* %src, i32 %idx.neg2
+  bitcast i8* %add.ptr3 to <16 x i32>*
+  %idx.neg5 = sub i32 0, %stride
+  %add.ptr6 = getelementptr i8, i8* %src, i32 %idx.neg5
+  bitcast i8* %add.ptr6 to <16 x i32>*
+  bitcast i8* %src to <16 x i32>*
+  %add.ptr10 = getelementptr i8, i8* %src, i32 %stride
+  bitcast i8* %add.ptr10 to <16 x i32>*
+  %add.ptr12 = getelementptr i8, i8* %src, i32 %mul1
+  bitcast i8* %add.ptr12 to <16 x i32>*
+  %add.ptr14 = getelementptr i8, i8* %src, i32 %mul
+  bitcast i8* %add.ptr14 to <16 x i32>*
+  bitcast i8* %dst to <16 x i32>*
+  load <16 x i32>, <16 x i32>* %0load <16 x i32>, <16 x i32>* %1load <16 x i32>, <16 x i32>* %2load <16 x i32>, <16 x i32>* %3load <16 x i32>, <16 x i32>* %4load <16 x i32>, <16 x i32>* %5load <16 x i32>, <16 x i32>* %6call <16 x i32> @llvm.hexagon.V6.vd0()
+  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %15, <16 x i32> %15)
+  call <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32> %14, <16 x i32> %8)
+  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %13, <16 x i32> %9)
+  call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %17, <32 x i32> %18, i32 101058054)
+  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %12, <16 x i32> %10)
+  call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %19, <32 x i32> %20, i32 252645135)
+  call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %21, <16 x i32> %11, i32 336860180)
+  %cmp155 = icmp sgt i32 %width, 64
+  br i1 %cmp155, label %for.body.preheader, label %for.end
+for.body.preheader:                               %incdec.ptr20 = getelementptr i8, i8* %add.ptr14%23 = bitcast i8* %incdec.ptr20 to <16 x i32>*
+  %incdec.ptr19 = getelementptr i8, i8* %add.ptr12%24 = bitcast i8* %incdec.ptr19 to <16 x i32>*
+  %incdec.ptr18 = getelementptr i8, i8* %add.ptr10%25 = bitcast i8* %incdec.ptr18 to <16 x i32>*
+  %incdec.ptr17 = getelementptr i8, i8* %src%26 = bitcast i8* %incdec.ptr17 to <16 x i32>*
+  %incdec.ptr16 = getelementptr i8, i8* %add.ptr6%27 = bitcast i8* %incdec.ptr16 to <16 x i32>*
+  %incdec.ptr15 = getelementptr i8, i8* %add.ptr3%28 = bitcast i8* %incdec.ptr15 to <16 x i32>*
+  %incdec.ptr = getelementptr i8, i8* %add.ptr%29 = bitcast i8* %incdec.ptr to <16 x i32>*
+  br label %for.body
+for.body:                                         %optr.0166 = phi <16 x i32>* [ %incdec.ptr28, %for.body ], [ %7, %for.body.preheader ]
+  %iptr6.0165 = phi <16 x i32>* [ %incdec.ptr27, %for.body ], [ %23, %for.body.preheader ]
+  %iptr5.0164 = phi <16 x i32>* [ %incdec.ptr26, %for.body ], [ %24, %for.body.preheader ]
+  %iptr4.0163 = phi <16 x i32>* [ %incdec.ptr25, %for.body ], [ %25, %for.body.preheader ]
+  %iptr3.0162 = phi <16 x i32>* [ %incdec.ptr24, %for.body ], [ %26, %for.body.preheader ]
+  %iptr2.0161 = phi <16 x i32>* [ %incdec.ptr23, %for.body ], [ %27, %for.body.preheader ]
+  %iptr1.0160 = phi <16 x i32>* [ %incdec.ptr22, %for.body ], [ %28, %for.body.preheader ]
+  %iptr0.0159 = phi <16 x i32>* [ %incdec.ptr21, %for.body ], [ %29, %for.body.preheader ]
+  %dXV1.0158 = phi <32 x i32> [ %49, %for.body ], [ %22, %for.body.preheader ]
+  %dXV0.0157 = phi <32 x i32> [ %dXV1.0158, %for.body ], [ %16, %for.body.preheader ]
+  %i.0156 = phi i32 [ %sub, %for.body ], [ %width, %for.body.preheader ]
+  %incdec.ptr21 = getelementptr <16 x i32>, <16 x i32>* %iptr0.0159%30 = load <16 x i32>, <16 x i32>* %iptr0.0159%incdec.ptr22 = getelementptr <16 x i32>, <16 x i32>* %iptr1.0160%31 = load <16 x i32>, <16 x i32>* %iptr1.0160%incdec.ptr23 = getelementptr <16 x i32>, <16 x i32>* %iptr2.0161%32 = load <16 x i32>, <16 x i32>* %iptr2.0161%incdec.ptr24 = getelementptr <16 x i32>, <16 x i32>* %iptr3.0162%33 = load <16 x i32>, <16 x i32>* %iptr3.0162%incdec.ptr25 = getelementptr <16 x i32>, <16 x i32>* %iptr4.0163%34 = load <16 x i32>, <16 x i32>* %iptr4.0163%incdec.ptr26 = getelementptr <16 x i32>, <16 x i32>* %iptr5.0164%35 = load <16 x i32>, <16 x i32>* %iptr5.0164%incdec.ptr27 = getelementptr <16 x i32>, <16 x i32>* %iptr6.0165%36 = load <16 x i32>, <16 x i32>* %iptr6.0165, !tbaa !8
+  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %dXV1.0158)
+  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %dXV0.0157)
+  call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %37, <16 x i32> %38, i32 2)
+  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dXV1.0158)
+  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dXV0.0157)
+  call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %40, <16 x i32> %41, i32 2)
+  call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %37, <16 x i32> %38, i32 4)
+  call <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32> %36, <16 x i32> %30)
+  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %35, <16 x i32> %31)
+  call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %44, <32 x i32> %45, i32 101058054)
+  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %34, <16 x i32> %32)
+  call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %46, <32 x i32> %47, i32 252645135)
+  call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %48, <16 x i32> %33, i32 336860180)
+  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %49)
+  call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %50, <16 x i32> %40, i32 2)
+  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %49)
+  call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %52, <16 x i32> %37, i32 2)
+  call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %50, <16 x i32> %40, i32 4)
+  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %37, <16 x i32> %39)
+  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %55, <16 x i32> %40)
+  call <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32> %56, i32 252972820)
+  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %51, <16 x i32> %40)
+  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %58, <16 x i32> %37)
+  call <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32> %59, i32 252972820)
+  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %53, <16 x i32> %43)
+  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %51, <16 x i32> %42)
+  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %61, <16 x i32> %62)
+  call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %57, <32 x i32> %63, i32 17170694)
+  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %54, <16 x i32> %42)
+  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %53, <16 x i32> %39)
+  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %65, <16 x i32> %66)
+  call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %60, <32 x i32> %67, i32 17170694)
+  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %64)
+  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %64)
+  call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %69, <16 x i32> %70, i32 12)
+  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %68)
+  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %68)
+  call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %72, <16 x i32> %73, i32 12)
+  call <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32> %74, <16 x i32> %71)
+  %incdec.ptr28 = getelementptr <16 x i32>, <16 x i32>* %1
+  store <16 x i32> %75, <16 x i32>* %optr.0166%sub = add i32 %i.0156, -64
+  %cmp = icmp sgt i32 %sub, 64
+  br i1 %cmp, label %for.body, label %for.end
+for.end:                                          ret void
+}
+declare <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32>, i32)
+declare <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32>, <32 x i32>, i32)
+declare <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32>, <16 x i32>, i32)
+declare <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32>, <16 x i32>)
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math""target-cpu"="hexagonv65" "target-features"="+hvx-length64b,+hvxv65,+v65,-long-calls" "unsafe-fp-math"}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"omnipotent char", !10}
+!10 = !{}
+!14 = !{}
+!19 = !{}
+!24 = !{}
+
Index: llvm/test/MC/Hexagon/hvx-swapped-regpairs-alias-neg.s
===================================================================
--- /dev/null
+++ llvm/test/MC/Hexagon/hvx-swapped-regpairs-alias-neg.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -arch=hexagon -mcpu=hexagonv67 -mhvx -filetype=asm %s 2>%t; FileCheck  --implicit-check-not="error:" %s <%t
+
+{
+  v1:0 = #0
+  v0:1 = #0
+}
+# CHECK: error: register `V1' modified more than once
+
+## Unused .tmp:
+{
+  v1.tmp = vmem(r0 + #3)
+  v0:1 = vaddw(v17:16, v17:16)
+}
+
+# CHECK: warning: register `V1' used with `.tmp' but not used in the same packet
+
Index: llvm/test/MC/Hexagon/hvx-swapped-regpairs.s
===================================================================
--- /dev/null
+++ llvm/test/MC/Hexagon/hvx-swapped-regpairs.s
@@ -0,0 +1,44 @@
+# RUN: llvm-mc -filetype=obj -arch=hexagon -mcpu=hexagonv67 -mhvx %s | llvm-objdump -d -mcpu=hexagonv67 -mhvx - | FileCheck %s
+# RUN: not llvm-mc -arch=hexagon -mcpu=hexagonv65 -mhvx -filetype=asm %s 2>%t; FileCheck --check-prefix=CHECK-V65 --implicit-check-not="error:" %s <%t
+
+v1:0.w = vadd(v0.h, v1.h) // Normal
+# CHECK: 1ca1c080
+
+v0:1.w = vadd(v0.h, v1.h) // Swapped
+# CHECK-NEXT: 1ca1c081
+# CHECK-V65: error: register pair `WR0' is not permitted for this architecture
+
+## Swapped use:
+v1:0.w = vtmpy(v0:1.h,r0.b)
+# CHECK-NEXT: 19a0c180
+# CHECK-V65: error: register pair `WR0' is not permitted for this architecture
+
+## Swapped def
+v0:1 = v3:2
+# CHECK-NEXT: 1f42c3e1 { v0:1 = vcombine(v3,v2) }
+# CHECK-V65: error: register pair `WR0' is not permitted for this architecture
+
+# Mapped instruction's swapped use:
+v1:0 = v2:3
+# CHECK-NEXT: v1:0 = vcombine(v2,v3)
+## No error for v65, this is now permitted!
+
+## .new producer from pair:
+{
+   v0:1 = vaddw(v0:1, v0:1)
+   if (!p0) vmem(r0+#0)=v0.new
+}
+# CHECK-NEXT: v0:1.w = vadd(v0:1.w,v0:1.w)
+# CHECK-NEXT: if (!p0) vmem(r0+#0) = v0.new
+# CHECK-V65: error: register pair `WR0' is not permitted for this architecture
+
+## Used .tmp, swapped use & def:
+{
+  v0.tmp = vmem(r0 + #3)
+  v2:3 = vaddw(v0:1, v0:1)
+}
+# CHECK-NEXT: 1c6141c3 { v2:3.w = vadd(v0:1.w,v0:1.w)
+# CHECK-NEXT:            v0.tmp = vmem(r0+#3) }
+# CHECK-V65: error: register pair `WR0' is not permitted for this architecture
+# CHECK-V65: error: register pair `WR1' is not permitted for this architecture
+