diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst
--- a/llvm/docs/RISCVUsage.rst
+++ b/llvm/docs/RISCVUsage.rst
@@ -181,6 +181,9 @@
 ``XTheadMac``
   LLVM implements `the XTheadMac (multiply-accumulate instructions) vendor-defined instructions specified in <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf>`_  by T-HEAD of Alibaba.  Instructions are prefixed with `th.` as described in the specification.
 
+``XTHeadMemPair``
+  LLVM implements `the THeadMemPair (two-GPR memory operations) vendor-defined instructions specified in <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf>`_  by T-HEAD of Alibaba.  Instructions are prefixed with `th.` as described in the specification.
+
 ``XTHeadVdot``
   LLVM implements `version 1.0.0 of the THeadV-family custom instructions specification <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.0/xthead-2022-12-04-2.2.0.pdf>`_ by T-HEAD of Alibaba.  All instructions are prefixed with `th.` as described in the specification, and the riscv-toolchain-convention document linked above.
 
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -112,6 +112,8 @@
 * Adds support for the vendor-defined XTHeadBb (basic bit-manipulation) extension.
 * Adds support for the vendor-defined XTHeadBs (single-bit) extension.
 * Adds support for the vendor-defined XTHeadMac (multiply-accumulate instructions) extension.
+* Added support for the vendor-defined XTHeadMemPair (two-GPR memory operations)
+  extension disassembler/assembler.
 
 Changes to the WebAssembly Backend
 ----------------------------------
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -113,6 +113,7 @@
     {"xtheadbb", RISCVExtensionVersion{1, 0}},
     {"xtheadbs", RISCVExtensionVersion{1, 0}},
     {"xtheadmac", RISCVExtensionVersion{1, 0}},
+    {"xtheadmempair", RISCVExtensionVersion{1, 0}},
     {"xtheadvdot", RISCVExtensionVersion{1, 0}},
     {"xventanacondops", RISCVExtensionVersion{1, 0}},
 };
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -2651,6 +2651,34 @@
     }
   }
 
+  unsigned Opcode = Inst.getOpcode();
+
+  if (Opcode == RISCV::TH_LDD || Opcode == RISCV::TH_LWUD ||
+      Opcode == RISCV::TH_LWD) {
+    unsigned Rd1 = Inst.getOperand(0).getReg();
+    unsigned Rd2 = Inst.getOperand(1).getReg();
+    unsigned Rs1 = Inst.getOperand(2).getReg();
+    // The encoding with rd1 == rd2 == rs1 is reserved for XTHead load pair.
+    if (Rs1 == Rd1 && Rs1 == Rd2) {
+      SMLoc Loc = Operands[1]->getStartLoc();
+      return Error(Loc, "The source register and destination registers "
+                        "cannot be equal.");
+    }
+  }
+
+  bool IsTHeadMemPair32 = (Opcode == RISCV::TH_LWD ||
+                           Opcode == RISCV::TH_LWUD || Opcode == RISCV::TH_SWD);
+  bool IsTHeadMemPair64 = (Opcode == RISCV::TH_LDD || Opcode == RISCV::TH_SDD);
+  // The last operand of XTHeadMemPair instructions must be constant 3 or 4
+  // depending on the data width.
+  if (IsTHeadMemPair32 && Inst.getOperand(4).getImm() != 3) {
+    SMLoc Loc = Operands.back()->getStartLoc();
+    return Error(Loc, "Operand must be constant 3.");
+  } else if (IsTHeadMemPair64 && Inst.getOperand(4).getImm() != 4) {
+    SMLoc Loc = Operands.back()->getStartLoc();
+    return Error(Loc, "Operand must be constant 4.");
+  }
+
   const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
   RISCVII::VConstraintType Constraints = RISCVII::getConstraint(MCID.TSFlags);
   if (Constraints == RISCVII::NoConstraint)
@@ -2674,7 +2702,6 @@
   if ((Constraints & RISCVII::VMConstraint) && (DestReg == RISCV::V0)) {
     // vadc, vsbc are special cases. These instructions have no mask register.
     // The destination register could not be V0.
-    unsigned Opcode = Inst.getOpcode();
     if (Opcode == RISCV::VADC_VVM || Opcode == RISCV::VADC_VXM ||
         Opcode == RISCV::VADC_VIM || Opcode == RISCV::VSBC_VVM ||
         Opcode == RISCV::VSBC_VXM || Opcode == RISCV::VFMERGE_VFM ||
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -369,6 +369,10 @@
                                            uint64_t Address,
                                            const MCDisassembler *Decoder);
 
+static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
+                                        uint64_t Address,
+                                        const MCDisassembler *Decoder);
+
 #include "RISCVGenDisassemblerTables.inc"
 
 static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn,
@@ -429,6 +433,28 @@
   return MCDisassembler::Success;
 }
 
+static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
+                                        uint64_t Address,
+                                        const MCDisassembler *Decoder) {
+  uint32_t Rd1 = fieldFromInstruction(Insn, 7, 5);
+  uint32_t Rs1 = fieldFromInstruction(Insn, 15, 5);
+  uint32_t Rd2 = fieldFromInstruction(Insn, 20, 5);
+  uint32_t UImm2 = fieldFromInstruction(Insn, 25, 2);
+  DecodeGPRRegisterClass(Inst, Rd1, Address, Decoder);
+  DecodeGPRRegisterClass(Inst, Rd2, Address, Decoder);
+  DecodeGPRRegisterClass(Inst, Rs1, Address, Decoder);
+  DecodeStatus Result = decodeUImmOperand<2>(Inst, UImm2, Address, Decoder);
+  (void)Result;
+  assert(Result == MCDisassembler::Success && "Invalid immediate");
+
+  // Disassemble the final operand which is implicit.
+  unsigned Opcode = Inst.getOpcode();
+  bool IsWordOp = (Opcode == RISCV::TH_LWD || Opcode == RISCV::TH_LWUD ||
+                   Opcode == RISCV::TH_SWD);
+  Inst.addOperand(MCOperand::createImm(IsWordOp ? 3 : 4));
+  return MCDisassembler::Success;
+}
+
 DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                                                ArrayRef<uint8_t> Bytes,
                                                uint64_t Address,
@@ -499,6 +525,13 @@
       if (Result != MCDisassembler::Fail)
         return Result;
     }
+    if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadMemPair]) {
+      LLVM_DEBUG(dbgs() << "Trying XTHeadMemPair custom opcode table:\n");
+      Result = decodeInstruction(DecoderTableTHeadMemPair32, MI, Insn, Address,
+                                 this, STI);
+      if (Result != MCDisassembler::Fail)
+        return Result;
+    }
     if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadVdot]) {
       LLVM_DEBUG(dbgs() << "Trying XTHeadVdot custom opcode table:\n");
       Result =
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -491,6 +491,13 @@
                                   AssemblerPredicate<(all_of FeatureVendorXTHeadMac),
                                   "'xtheadmac' (T-Head Multiply-Accumulate Instructions)">;
 
+def FeatureVendorXTHeadMemPair
+    : SubtargetFeature<"xtheadmempair", "HasVendorXTHeadMemPair", "true",
+                       "'xtheadmempair' (T-Head two-GPR Memory Operations)">;
+def HasVendorXTHeadMemPair : Predicate<"Subtarget->hasVendorXTHeadMemPair()">,
+                                    AssemblerPredicate<(all_of FeatureVendorXTHeadMemPair),
+                                    "'xtheadmempair' (T-Head two-GPR Memory Operations)">;
+
 def FeatureVendorXTHeadVdot
     : SubtargetFeature<"xtheadvdot", "HasVendorXTHeadVdot", "true",
                        "'xtheadvdot' (T-Head Vector Extensions for Dot)",
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -336,6 +336,12 @@
   // Load address.
   LA = ISD::FIRST_TARGET_MEMORY_OPCODE,
   LA_TLS_IE,
+
+  TH_LWD,
+  TH_LWUD,
+  TH_LDD,
+  TH_SWD,
+  TH_SDD,
 };
 } // namespace RISCVISD
 
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1046,6 +1046,8 @@
     setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
                          ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
                          ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR});
+  if (Subtarget.hasVendorXTHeadMemPair())
+    setTargetDAGCombine({ISD::LOAD, ISD::STORE});
   if (Subtarget.useRVVForFixedLengthVectors())
     setTargetDAGCombine(ISD::BITCAST);
 
@@ -9717,6 +9719,143 @@
   return InputRootReplacement;
 }
 
+// Helper function for performMemPairCombine.
+// Try to combine the memory loads/stores LSNode1 and LSNode2
+// into a single memory pair operation.
+static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
+                                 LSBaseSDNode *LSNode2, SDValue BasePtr,
+                                 uint64_t Imm) {
+  SmallPtrSet<const SDNode *, 32> Visited;
+  SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
+
+  if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
+      SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
+    return SDValue();
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+
+  // The new operation has twice the width.
+  MVT XLenVT = Subtarget.getXLenVT();
+  EVT MemVT = LSNode1->getMemoryVT();
+  EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
+  MachineMemOperand *MMO = LSNode1->getMemOperand();
+  MachineMemOperand *NewMMO = MF.getMachineMemOperand(
+      MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
+
+  if (LSNode1->getOpcode() == ISD::LOAD) {
+    auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
+    unsigned Opcode;
+    if (MemVT == MVT::i32)
+      Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
+    else
+      Opcode = RISCVISD::TH_LDD;
+
+    SDValue Res = DAG.getMemIntrinsicNode(
+        Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
+        {LSNode1->getChain(), BasePtr,
+         DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
+        NewMemVT, NewMMO);
+
+    SDValue Node1 =
+        DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
+    SDValue Node2 =
+        DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
+
+    DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
+    return Node1;
+  } else {
+    unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
+
+    SDValue Res = DAG.getMemIntrinsicNode(
+        Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
+        {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
+         BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
+        NewMemVT, NewMMO);
+
+    DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
+    return Res;
+  }
+}
+
+// Try to combine two adjacent loads/stores to a single pair instruction from
+// the XTHeadMemPair vendor extension.
+static SDValue performMemPairCombine(SDNode *N,
+                                     TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  MachineFunction &MF = DAG.getMachineFunction();
+  const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+
+  // Target does not support load/store pair.
+  if (!Subtarget.hasVendorXTHeadMemPair())
+    return SDValue();
+
+  LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
+  EVT MemVT = LSNode1->getMemoryVT();
+  unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
+
+  // No volatile, indexed or atomic loads/stores.
+  if (!LSNode1->isSimple() || LSNode1->isIndexed())
+    return SDValue();
+
+  // Function to get a base + constant representation from a memory value.
+  auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
+    if (Ptr->getOpcode() == ISD::ADD)
+      if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
+        return {Ptr->getOperand(0), C1->getZExtValue()};
+    return {Ptr, 0};
+  };
+
+  auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
+
+  SDValue Chain = N->getOperand(0);
+  for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
+       UI != UE; ++UI) {
+    SDUse &Use = UI.getUse();
+    if (Use.getUser() != N && Use.getResNo() == 0 &&
+        Use.getUser()->getOpcode() == N->getOpcode()) {
+      LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
+
+      // No volatile, indexed or atomic loads/stores.
+      if (!LSNode2->isSimple() || LSNode2->isIndexed())
+        continue;
+
+      // Check if LSNode1 and LSNode2 have the same type and extension.
+      if (LSNode1->getOpcode() == ISD::LOAD)
+        if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
+            cast<LoadSDNode>(LSNode1)->getExtensionType())
+          continue;
+
+      if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
+        continue;
+
+      auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
+
+      // Check if the base pointer is the same for both instruction.
+      if (Base1 != Base2)
+        continue;
+
+      // Check if the offsets match the XTHeadMemPair encoding constraints.
+      if (MemVT == MVT::i32) {
+        // Check for adjacent i32 values and a 2-bit index.
+        if ((Offset1 + 4 != Offset2) || !isShiftedUInt<2, 3>(Offset1))
+          continue;
+      } else if (MemVT == MVT::i64) {
+        // Check for adjacent i64 values and a 2-bit index.
+        if ((Offset1 + 8 != Offset2) || !isShiftedUInt<2, 4>(Offset1))
+          continue;
+      }
+
+      // Try to combine.
+      if (SDValue Res =
+              tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
+        return Res;
+    }
+  }
+
+  return SDValue();
+}
+
 // Fold
 //   (fp_to_int (froundeven X)) -> fcvt X, rne
 //   (fp_to_int (ftrunc X))     -> fcvt X, rtz
@@ -10688,7 +10827,15 @@
     return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
                        VL);
   }
+  case ISD::LOAD:
   case ISD::STORE: {
+    if (DCI.isAfterLegalizeDAG())
+      if (SDValue V = performMemPairCombine(N, DCI))
+        return V;
+
+    if (N->getOpcode() != ISD::STORE)
+      break;
+
     auto *Store = cast<StoreSDNode>(N);
     SDValue Val = Store->getValue();
     // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
@@ -13515,6 +13662,11 @@
   NODE_NAME_CASE(ORC_B)
   NODE_NAME_CASE(ZIP)
   NODE_NAME_CASE(UNZIP)
+  NODE_NAME_CASE(TH_LWD)
+  NODE_NAME_CASE(TH_LWUD)
+  NODE_NAME_CASE(TH_LDD)
+  NODE_NAME_CASE(TH_SWD)
+  NODE_NAME_CASE(TH_SDD)
   NODE_NAME_CASE(VMV_V_X_VL)
   NODE_NAME_CASE(VFMV_V_F_VL)
   NODE_NAME_CASE(VMV_X_S)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
@@ -10,6 +10,26 @@
 //
 //===----------------------------------------------------------------------===//
 
+//===----------------------------------------------------------------------===//
+// T-HEAD specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_TDBLD : SDTypeProfile<2, 2,
+  [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 3>, SDTCisPtrTy<2>, SDTCisVT<3, XLenVT>]>;
+def SDT_TDBST : SDTypeProfile<0, 4,
+  [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 3>, SDTCisPtrTy<2>, SDTCisVT<3, XLenVT>]>;
+
+def TH_TLWUD : SDNode<"RISCVISD::TH_LWUD", SDT_TDBLD,
+  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def TH_TLWD : SDNode<"RISCVISD::TH_LWD", SDT_TDBLD,
+  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def TH_TLDD : SDNode<"RISCVISD::TH_LDD", SDT_TDBLD,
+  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def TH_TSWD : SDNode<"RISCVISD::TH_SWD", SDT_TDBST,
+  [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def TH_TSDD : SDNode<"RISCVISD::TH_SDD", SDT_TDBST,
+  [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
 //===----------------------------------------------------------------------===//
 // Instruction class templates
 //===----------------------------------------------------------------------===//
@@ -96,6 +116,29 @@
   let Constraints = "$rd_up = $rd";
 }
 
+let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "THeadMemPair",
+  hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+class THLoadPair<bits<5> funct5, string opcodestr>
+  : RVInstR<!shl(funct5, 2), 0b100, OPC_CUSTOM_0,
+  (outs GPR:$rd, GPR:$rs2), (ins GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
+  opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
+  bits<2> uimm2;
+  let Inst{26-25} = uimm2;
+  let DecoderMethod = "decodeXTHeadMemPair";
+  let Constraints = "@earlyclobber $rd,@earlyclobber $rs2";
+}
+
+let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "THeadMemPair",
+  hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+class THStorePair<bits<5> funct5, string opcodestr>
+  : RVInstR<!shl(funct5, 2), 0b101, OPC_CUSTOM_0,
+  (outs), (ins GPR:$rd, GPR:$rs2, GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
+  opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
+  bits<2> uimm2;
+  let Inst{26-25} = uimm2;
+  let DecoderMethod = "decodeXTHeadMemPair";
+}
+
 
 //===----------------------------------------------------------------------===//
 // Combination of instruction classes.
@@ -151,6 +194,23 @@
 def TH_MULSW : THMulAccumulate_rr<0b0010011, "th.mulsw">;
 } // Predicates = [HasVendorXTHeadMac, IsRV64]
 
+let Predicates = [HasVendorXTHeadMemPair] in {
+def TH_LWUD : THLoadPair<0b11110, "th.lwud">,
+	      Sched<[WriteLDW, WriteLDW, ReadMemBase]>;
+def TH_SWD  : THStorePair<0b11100, "th.swd">,
+	      Sched<[WriteSTW, WriteSTW, ReadStoreData, ReadMemBase]>;
+let IsSignExtendingOpW = 1 in
+def TH_LWD  : THLoadPair<0b11100, "th.lwd">,
+	      Sched<[WriteLDW, WriteLDW, ReadMemBase]>;
+}
+
+let Predicates = [HasVendorXTHeadMemPair, IsRV64] in {
+def TH_LDD  : THLoadPair<0b11111, "th.ldd">,
+	      Sched<[WriteLDD, WriteLDD, ReadMemBase]>;
+def TH_SDD  : THStorePair<0b11111, "th.sdd">,
+	      Sched<[WriteSTD, WriteSTD, ReadStoreData, ReadMemBase]>;
+}
+
 let Predicates = [HasVendorXTHeadVdot],
     Constraints = "@earlyclobber $vd",
     RVVConstraint = WidenV in {
@@ -337,3 +397,35 @@
 defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqasu","PseudoTHVdotVMAQASU",AllQuadWidenableInt8NoVLMulVectors>;
 defm : VPatTernaryVMAQA_VX<"int_riscv_th_vmaqaus",   "PseudoTHVdotVMAQAUS",AllQuadWidenableInt8NoVLMulVectors>;
 }
+
+def uimm2_3_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 0x3,
+				   SDLoc(N), Subtarget->getXLenVT());
+}]>;
+
+def uimm2_3 : Operand<XLenVT>, ImmLeaf<XLenVT, [{
+  return isShiftedUInt<2, 3>(Imm);
+}], uimm2_3_XFORM>;
+
+def uimm2_4_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((N->getZExtValue() >> 4) & 0x3,
+				   SDLoc(N), Subtarget->getXLenVT());
+}]>;
+
+def uimm2_4 : Operand<XLenVT>, ImmLeaf<XLenVT, [{
+  return isShiftedUInt<2, 4>(Imm);
+}], uimm2_4_XFORM>;
+
+let Predicates = [HasVendorXTHeadMemPair, IsRV64] in {
+def : Pat<(TH_TLWUD i64:$rs1, uimm2_3:$uimm2_3), (TH_LWUD i64:$rs1, uimm2_3:$uimm2_3, 3)>;
+def : Pat<(TH_TLDD i64:$rs1, uimm2_4:$uimm2_4), (TH_LDD i64:$rs1, uimm2_4:$uimm2_4, 4)>;
+
+def : Pat<(TH_TSDD i64:$rd1, i64:$rd2, i64:$rs1, uimm2_4:$uimm2_4),
+	(TH_SDD i64:$rd1, i64:$rd2, i64:$rs1, uimm2_4:$uimm2_4, 4)>;
+}
+
+let Predicates = [HasVendorXTHeadMemPair] in {
+  def : Pat<(TH_TLWD GPR:$rs1, uimm2_3:$uimm2_3), (TH_LWD GPR:$rs1, uimm2_3:$uimm2_3, 3)>;
+  def : Pat<(TH_TSWD GPR:$rd1, GPR:$rd2, GPR:$rs1, uimm2_3:$uimm2_3),
+            (TH_SWD GPR:$rd1, GPR:$rd2, GPR:$rs1, uimm2_3:$uimm2_3, 3)>;
+}
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -42,6 +42,7 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+svpbmt %s -o - | FileCheck --check-prefixes=CHECK,RV32SVPBMT %s
 ; RUN: llc -mtriple=riscv32 -mattr=+svinval %s -o - | FileCheck --check-prefixes=CHECK,RV32SVINVAL %s
 ; RUN: llc -mtriple=riscv32 -mattr=+xtheadmac %s -o - | FileCheck --check-prefixes=CHECK,RV32XTHEADMAC %s
+; RUN: llc -mtriple=riscv32 -mattr=+xtheadmempair %s -o - | FileCheck --check-prefix=RV32XTHEADMEMPAIR %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zca %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCA %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcb %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCB %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcd %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCD %s
@@ -93,6 +94,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBB %s
 ; RUN: llc -mtriple=riscv64 -mattr=+xtheadbs %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+xtheadMAC %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADMAC %s
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadmempair %s -o - | FileCheck --check-prefix=RV64XTHEADMEMPAIR %s
 ; RUN: llc -mtriple=riscv64 -mattr=+xtheadvdot %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADVDOT %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zawrs %s -o - | FileCheck --check-prefixes=CHECK,RV64ZAWRS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-ztso %s -o - | FileCheck --check-prefixes=CHECK,RV64ZTSO %s
@@ -143,6 +145,7 @@
 ; RV32SVPBMT: .attribute 5, "rv32i2p0_svpbmt1p0"
 ; RV32SVINVAL: .attribute 5, "rv32i2p0_svinval1p0"
 ; RV32XTHEADMAC: .attribute 5, "rv32i2p0_xtheadmac1p0"
+; RV32XTHEADMEMPAIR: .attribute 5, "rv32i2p0_xtheadmempair1p0"
 ; RV32ZCA: .attribute 5, "rv32i2p0_zca1p0"
 ; RV32ZCB: .attribute 5, "rv32i2p0_zca1p0_zcb1p0"
 ; RV32ZCD: .attribute 5, "rv32i2p0_zcd1p0"
@@ -194,6 +197,7 @@
 ; RV64XTHEADBB: .attribute 5, "rv64i2p0_xtheadbb1p0"
 ; RV64XTHEADBS: .attribute 5, "rv64i2p0_xtheadbs1p0"
 ; RV64XTHEADMAC: .attribute 5, "rv64i2p0_xtheadmac1p0"
+; RV64XTHEADMEMPAIR: .attribute 5, "rv64i2p0_xtheadmempair1p0"
 ; RV64XTHEADVDOT: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xtheadvdot1p0"
 ; RV64ZTSO: .attribute 5, "rv64i2p0_ztso0p1"
 ; RV64ZCA: .attribute 5, "rv64i2p0_zca1p0"
diff --git a/llvm/test/CodeGen/RISCV/xtheadmempair.ll b/llvm/test/CodeGen/RISCV/xtheadmempair.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/xtheadmempair.ll
@@ -0,0 +1,295 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+xtheadmempair -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32XTHEADMEMPAIR
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadmempair -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64XTHEADMEMPAIR
+
+define i64 @lwd(i32* %a) {
+; RV32XTHEADMEMPAIR-LABEL: lwd:
+; RV32XTHEADMEMPAIR:       # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT:    th.lwd a1, a2, (a0), 2, 3
+; RV32XTHEADMEMPAIR-NEXT:    srai a3, a1, 31
+; RV32XTHEADMEMPAIR-NEXT:    srai a4, a2, 31
+; RV32XTHEADMEMPAIR-NEXT:    add a0, a1, a2
+; RV32XTHEADMEMPAIR-NEXT:    sltu a1, a0, a1
+; RV32XTHEADMEMPAIR-NEXT:    add a3, a3, a4
+; RV32XTHEADMEMPAIR-NEXT:    add a1, a3, a1
+; RV32XTHEADMEMPAIR-NEXT:    ret
+;
+; RV64XTHEADMEMPAIR-LABEL: lwd:
+; RV64XTHEADMEMPAIR:       # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT:    th.lwd a1, a2, (a0), 2, 3
+; RV64XTHEADMEMPAIR-NEXT:    add a0, a1, a2
+; RV64XTHEADMEMPAIR-NEXT:    ret
+  %1 = getelementptr i32, i32* %a, i64 4
+  %2 = load i32, i32* %1, align 4
+  %3 = getelementptr i32, i32* %a, i64 5
+  %4 = load i32, i32* %3, align 4
+  %5 = sext i32 %2 to i64
+  %6 = sext i32 %4 to i64
+  %7 = add i64 %5, %6
+  ret i64 %7
+}
+
+define i64 @lwud(i32* %a) {
+; RV32XTHEADMEMPAIR-LABEL: lwud:
+; RV32XTHEADMEMPAIR:       # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT:    th.lwd a1, a2, (a0), 2, 3
+; RV32XTHEADMEMPAIR-NEXT:    add a0, a1, a2
+; RV32XTHEADMEMPAIR-NEXT:    sltu a1, a0, a1
+; RV32XTHEADMEMPAIR-NEXT:    ret
+;
+; RV64XTHEADMEMPAIR-LABEL: lwud:
+; RV64XTHEADMEMPAIR:       # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT:    th.lwud a1, a2, (a0), 2, 3
+; RV64XTHEADMEMPAIR-NEXT:    add a0, a1, a2
+; RV64XTHEADMEMPAIR-NEXT:    ret
+  %1 = getelementptr i32, i32* %a, i64 4
+  %2 = load i32, i32* %1, align 4
+  %3 = getelementptr i32, i32* %a, i64 5
+  %4 = load i32, i32* %3, align 4
+  %5 = zext i32 %2 to i64
+  %6 = zext i32 %4 to i64
+  %7 = add i64 %5, %6
+  ret i64 %7
+}
+
+define i64 @ldd(i64* %a) {
+; RV32XTHEADMEMPAIR-LABEL: ldd:
+; RV32XTHEADMEMPAIR:       # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT:    lw a1, 32(a0)
+; RV32XTHEADMEMPAIR-NEXT:    lw a2, 36(a0)
+; RV32XTHEADMEMPAIR-NEXT:    lw a3, 44(a0)
+; RV32XTHEADMEMPAIR-NEXT:    lw a0, 40(a0)
+; RV32XTHEADMEMPAIR-NEXT:    add a2, a2, a3
+; RV32XTHEADMEMPAIR-NEXT:    add a0, a1, a0
+; RV32XTHEADMEMPAIR-NEXT:    sltu a1, a0, a1
+; RV32XTHEADMEMPAIR-NEXT:    add a1, a2, a1
+; RV32XTHEADMEMPAIR-NEXT:    ret
+;
+; RV64XTHEADMEMPAIR-LABEL: ldd:
+; RV64XTHEADMEMPAIR:       # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT:    th.ldd a1, a2, (a0), 2, 4
+; RV64XTHEADMEMPAIR-NEXT:    add a0, a1, a2
+; RV64XTHEADMEMPAIR-NEXT:    ret
+  %1 = getelementptr i64, i64* %a, i64 4
+  %2 = load i64, i64* %1, align 8
+  %3 = getelementptr i64, i64* %a, i64 5
+  %4 = load i64, i64* %3, align 8
+  %5 = add i64 %2, %4
+  ret i64 %5
+}
+
+define i64 @lwd_0(i32* %a) {
+; RV32XTHEADMEMPAIR-LABEL: lwd_0:
+; RV32XTHEADMEMPAIR:       # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT:    th.lwd a1, a2, (a0), 0, 3
+; RV32XTHEADMEMPAIR-NEXT:    srai a3, a1, 31
+; RV32XTHEADMEMPAIR-NEXT:    srai a4, a2, 31
+; RV32XTHEADMEMPAIR-NEXT:    add a0, a1, a2
+; RV32XTHEADMEMPAIR-NEXT:    sltu a1, a0, a1
+; RV32XTHEADMEMPAIR-NEXT:    add a3, a3, a4
+; RV32XTHEADMEMPAIR-NEXT:    add a1, a3, a1
+; RV32XTHEADMEMPAIR-NEXT:    ret
+;
+; RV64XTHEADMEMPAIR-LABEL: lwd_0:
+; RV64XTHEADMEMPAIR:       # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT:    th.lwd a1, a2, (a0), 0, 3
+; RV64XTHEADMEMPAIR-NEXT:    add a0, a1, a2
+; RV64XTHEADMEMPAIR-NEXT:    ret
+  %1 = getelementptr i32, i32* %a, i64 0
+  %2 = load i32, i32* %1, align 4
+  %3 = getelementptr i32, i32* %a, i64 1
+  %4 = load i32, i32* %3, align 4
+  %5 = sext i32 %2 to i64
+  %6 = sext i32 %4 to i64
+  %7 = add i64 %5, %6
+  ret i64 %7
+}
+
+define i64 @lwud_0(i32* %a) {
+; RV32XTHEADMEMPAIR-LABEL: lwud_0:
+; RV32XTHEADMEMPAIR:       # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT:    th.lwd a1, a2, (a0), 0, 3
+; RV32XTHEADMEMPAIR-NEXT:    add a0, a1, a2
+; RV32XTHEADMEMPAIR-NEXT:    sltu a1, a0, a1
+; RV32XTHEADMEMPAIR-NEXT:    ret
+;
+; RV64XTHEADMEMPAIR-LABEL: lwud_0:
+; RV64XTHEADMEMPAIR:       # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT:    th.lwud a1, a2, (a0), 0, 3
+; RV64XTHEADMEMPAIR-NEXT:    add a0, a1, a2
+; RV64XTHEADMEMPAIR-NEXT:    ret
+  %1 = getelementptr i32, i32* %a, i64 0
+  %2 = load i32, i32* %1, align 4
+  %3 = getelementptr i32, i32* %a, i64 1
+  %4 = load i32, i32* %3, align 4
+  %5 = zext i32 %2 to i64
+  %6 = zext i32 %4 to i64
+  %7 = add i64 %5, %6
+  ret i64 %7
+}
+
+define i64 @ldd_0(i64* %a) {
+; RV32XTHEADMEMPAIR-LABEL: ldd_0:
+; RV32XTHEADMEMPAIR:       # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT:    th.lwd a1, a2, (a0), 0, 3
+; RV32XTHEADMEMPAIR-NEXT:    th.lwd a3, a4, (a0), 1, 3
+; RV32XTHEADMEMPAIR-NEXT:    add a2, a2, a4
+; RV32XTHEADMEMPAIR-NEXT:    add a0, a1, a3
+; RV32XTHEADMEMPAIR-NEXT:    sltu a1, a0, a1
+; RV32XTHEADMEMPAIR-NEXT:    add a1, a2, a1
+; RV32XTHEADMEMPAIR-NEXT:    ret
+;
+; RV64XTHEADMEMPAIR-LABEL: ldd_0:
+; RV64XTHEADMEMPAIR:       # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT:    th.ldd a1, a2, (a0), 0, 4
+; RV64XTHEADMEMPAIR-NEXT:    add a0, a1, a2
+; RV64XTHEADMEMPAIR-NEXT:    ret
+  %1 = getelementptr i64, i64* %a, i64 0
+  %2 = load i64, i64* %1, align 8
+  %3 = getelementptr i64, i64* %a, i64 1
+  %4 = load i64, i64* %3, align 8
+  %5 = add i64 %2, %4
+  ret i64 %5
+}
+
+define void @swd(i32* %a, i32 %b, i32%c) {
+; RV32XTHEADMEMPAIR-LABEL: swd:
+; RV32XTHEADMEMPAIR:       # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT:    th.swd a1, a2, (a0), 2, 3
+; RV32XTHEADMEMPAIR-NEXT:    ret
+;
+; RV64XTHEADMEMPAIR-LABEL: swd:
+; RV64XTHEADMEMPAIR:       # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT:    th.swd a1, a2, (a0), 2, 3
+; RV64XTHEADMEMPAIR-NEXT:    ret
+  %1 = getelementptr i32, i32* %a, i64 4
+  store i32 %b, i32* %1, align 4
+  %2 = getelementptr i32, i32* %a, i64 5
+  store i32 %c, i32* %2, align 4
+  ret void
+}
+
+define void @sdd(i64* %a, i64 %b, i64%c) {
+; RV32XTHEADMEMPAIR-LABEL: sdd:
+; RV32XTHEADMEMPAIR:       # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT:    sw a2, 36(a0)
+; RV32XTHEADMEMPAIR-NEXT:    sw a1, 32(a0)
+; RV32XTHEADMEMPAIR-NEXT:    sw a4, 44(a0)
+; RV32XTHEADMEMPAIR-NEXT:    sw a3, 40(a0)
+; RV32XTHEADMEMPAIR-NEXT:    ret
+;
+; RV64XTHEADMEMPAIR-LABEL: sdd:
+; RV64XTHEADMEMPAIR:       # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT:    th.sdd a1, a2, (a0), 2, 4
+; RV64XTHEADMEMPAIR-NEXT:    ret
+  %1 = getelementptr i64, i64* %a, i64 4
+  store i64 %b, i64* %1, align 8
+  %2 = getelementptr i64, i64* %a, i64 5
+  store i64 %c, i64* %2, align 8
+  ret void
+}
+
+define void @swd_0(i32* %a, i32 %b, i32%c) {
+; RV32XTHEADMEMPAIR-LABEL: swd_0:
+; RV32XTHEADMEMPAIR:       # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT:    th.swd a1, a2, (a0), 0, 3
+; RV32XTHEADMEMPAIR-NEXT:    ret
+;
+; RV64XTHEADMEMPAIR-LABEL: swd_0:
+; RV64XTHEADMEMPAIR:       # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT:    th.swd a1, a2, (a0), 0, 3
+; RV64XTHEADMEMPAIR-NEXT:    ret
+  %1 = getelementptr i32, i32* %a, i64 0
+  store i32 %b, i32* %1, align 4
+  %2 = getelementptr i32, i32* %a, i64 1
+  store i32 %c, i32* %2, align 4
+  ret void
+}
+
+define void @sdd_0(i64* %a, i64 %b, i64%c) {
+; RV32XTHEADMEMPAIR-LABEL: sdd_0:
+; RV32XTHEADMEMPAIR:       # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT:    th.swd a1, a2, (a0), 0, 3
+; RV32XTHEADMEMPAIR-NEXT:    th.swd a3, a4, (a0), 1, 3
+; RV32XTHEADMEMPAIR-NEXT:    ret
+;
+; RV64XTHEADMEMPAIR-LABEL: sdd_0:
+; RV64XTHEADMEMPAIR:       # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT:    th.sdd a1, a2, (a0), 0, 4
+; RV64XTHEADMEMPAIR-NEXT:    ret
+  %1 = getelementptr i64, i64* %a, i64 0
+  store i64 %b, i64* %1, align 8
+  %2 = getelementptr i64, i64* %a, i64 1
+  store i64 %c, i64* %2, align 8
+  ret void
+}
+
+define i64 @ld64(i64* %a) {
+; RV32XTHEADMEMPAIR-LABEL: ld64:
+; RV32XTHEADMEMPAIR:       # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT:    th.lwd a2, a1, (a0), 0, 3
+; RV32XTHEADMEMPAIR-NEXT:    mv a0, a2
+; RV32XTHEADMEMPAIR-NEXT:    ret
+;
+; RV64XTHEADMEMPAIR-LABEL: ld64:
+; RV64XTHEADMEMPAIR:       # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT:    ld a0, 0(a0)
+; RV64XTHEADMEMPAIR-NEXT:    ret
+  %1 = getelementptr i64, i64* %a, i64 0
+  %2 = load i64, i64* %1, align 8
+  ret i64 %2
+}
+
+define i128 @ld128(i128* %a) {
+; RV32XTHEADMEMPAIR-LABEL: ld128:
+; RV32XTHEADMEMPAIR:       # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT:    th.lwd a2, a3, (a1), 1, 3
+; RV32XTHEADMEMPAIR-NEXT:    th.lwd a4, a5, (a1), 0, 3
+; RV32XTHEADMEMPAIR-NEXT:    th.swd a2, a3, (a0), 1, 3
+; RV32XTHEADMEMPAIR-NEXT:    th.swd a4, a5, (a0), 0, 3
+; RV32XTHEADMEMPAIR-NEXT:    ret
+;
+; RV64XTHEADMEMPAIR-LABEL: ld128:
+; RV64XTHEADMEMPAIR:       # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT:    th.ldd a2, a1, (a0), 0, 4
+; RV64XTHEADMEMPAIR-NEXT:    mv a0, a2
+; RV64XTHEADMEMPAIR-NEXT:    ret
+  %1 = getelementptr i128, i128* %a, i64 0
+  %2 = load i128, i128* %1, align 8
+  ret i128 %2
+}
+
+define void @sd64(i64* %a, i64 %b) {
+; RV32XTHEADMEMPAIR-LABEL: sd64:
+; RV32XTHEADMEMPAIR:       # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT:    th.swd a1, a2, (a0), 0, 3
+; RV32XTHEADMEMPAIR-NEXT:    ret
+;
+; RV64XTHEADMEMPAIR-LABEL: sd64:
+; RV64XTHEADMEMPAIR:       # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT:    sd a1, 0(a0)
+; RV64XTHEADMEMPAIR-NEXT:    ret
+  %1 = getelementptr i64, i64* %a, i64 0
+  store i64 %b, i64* %1, align 8
+  ret void
+}
+
+define void @sd128(i128* %a, i128 %b) {
+; RV32XTHEADMEMPAIR-LABEL: sd128:
+; RV32XTHEADMEMPAIR:       # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT:    th.lwd a2, a3, (a1), 1, 3
+; RV32XTHEADMEMPAIR-NEXT:    th.lwd a4, a5, (a1), 0, 3
+; RV32XTHEADMEMPAIR-NEXT:    th.swd a2, a3, (a0), 1, 3
+; RV32XTHEADMEMPAIR-NEXT:    th.swd a4, a5, (a0), 0, 3
+; RV32XTHEADMEMPAIR-NEXT:    ret
+;
+; RV64XTHEADMEMPAIR-LABEL: sd128:
+; RV64XTHEADMEMPAIR:       # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT:    th.sdd a1, a2, (a0), 0, 4
+; RV64XTHEADMEMPAIR-NEXT:    ret
+  %1 = getelementptr i128, i128* %a, i64 0
+  store i128 %b, i128* %1, align 8
+  ret void
+}
diff --git a/llvm/test/MC/RISCV/rv32xtheadmempair-invalid.s b/llvm/test/MC/RISCV/rv32xtheadmempair-invalid.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32xtheadmempair-invalid.s
@@ -0,0 +1,20 @@
+# RUN: not llvm-mc -triple riscv32 -mattr=+xtheadmempair < %s 2>&1 | FileCheck %s
+
+th.ldd t0, t1, (t2), 5, 4   # CHECK: [[@LINE]]:22: error: invalid operand for instruction
+th.ldd t0, t1, (t2)         # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.ldd t0, t1, (t2), 3, 5   # CHECK: [[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
+th.sdd a0, a1, (a2), 5, 4   # CHECK: [[@LINE]]:22: error: invalid operand for instruction
+th.sdd a0, a1, (a2)         # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.sdd a0, a1, (a2), 3, 5   # CHECK: [[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
+th.lwud t0, t1, (t2), 5, 4  # CHECK: [[@LINE]]:23: error: immediate must be an integer in the range [0, 3]
+th.lwud t0, t1, (t2)        # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.lwud t0, t1, (t2), 3, 5  # CHECK: [[@LINE]]:26: error: Operand must be constant 3.
+th.lwd a3, a4, (a5), 5, 4   # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
+th.lwd a3, a4, (a5)         # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.lwd a3, a4, (a5), 3, 5   # CHECK: [[@LINE]]:25: error: Operand must be constant 3.
+th.swd t3, t4, (t5), 5, 4   # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
+th.swd t3, t4, (t5)         # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.swd t3, t4, (t5), 3, 5   # CHECK: [[@LINE]]:25: error: Operand must be constant 3.
+th.lwud x6, x6, (x6), 2, 3  # CHECK: [[@LINE]]:9: error: The source register and destination registers cannot be equal.
+th.ldd t0, t1, (t2), 2, 4   # CHECK: [[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
+th.sdd t0, t1, (t2), 2, 4   # CHECK: [[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
diff --git a/llvm/test/MC/RISCV/rv32xtheadmempair-valid.s b/llvm/test/MC/RISCV/rv32xtheadmempair-valid.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32xtheadmempair-valid.s
@@ -0,0 +1,18 @@
+# With Bitmanip base extension:
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+xtheadmempair -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+xtheadmempair < %s \
+# RUN:     | llvm-objdump --mattr=+xtheadmempair -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: th.lwd
+# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xe2]
+th.lwd a0, a1, (a2), 1, 3
+
+# CHECK-ASM-AND-OBJ: th.lwud
+# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xf4]
+th.lwud a0, a1, (a2), 2, 3
+
+# CHECK-ASM-AND-OBJ: th.swd
+# CHECK-ASM: encoding: [0x0b,0x55,0xb6,0xe0]
+th.swd a0, a1, (a2), 0, 3
diff --git a/llvm/test/MC/RISCV/rv64xtheadmempair-invalid.s b/llvm/test/MC/RISCV/rv64xtheadmempair-invalid.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64xtheadmempair-invalid.s
@@ -0,0 +1,18 @@
+# RUN: not llvm-mc -triple riscv64 -mattr=+xtheadmempair < %s 2>&1 | FileCheck %s
+
+th.ldd t0, t1, (t2), 5, 4   # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
+th.ldd t0, t1, (t2)         # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.ldd t0, t1, (t2), 3, 5   # CHECK: [[@LINE]]:25: error: Operand must be constant 4.
+th.sdd a0, a1, (a2), 5, 4   # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
+th.sdd a0, a1, (a2)         # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.sdd a0, a1, (a2), 3, 5   # CHECK: [[@LINE]]:25: error: Operand must be constant 4.
+th.lwud t0, t1, (t2), 5, 4  # CHECK: [[@LINE]]:23: error: immediate must be an integer in the range [0, 3]
+th.lwud t0, t1, (t2)        # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.lwud t0, t1, (t2), 3, 5  # CHECK: [[@LINE]]:26: error: Operand must be constant 3.
+th.lwd a3, a4, (a5), 5, 4   # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
+th.lwd a3, a4, (a5)         # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.lwd a3, a4, (a5), 3, 5   # CHECK: [[@LINE]]:25: error: Operand must be constant 3.
+th.swd t3, t4, (t5), 5, 4   # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
+th.swd t3, t4, (t5)         # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.swd t3, t4, (t5), 3, 5   # CHECK: [[@LINE]]:25: error: Operand must be constant 3.
+th.lwud x6, x6, (x6), 2, 3  # CHECK: [[@LINE]]:9: error: The source register and destination registers cannot be equal.
diff --git a/llvm/test/MC/RISCV/rv64xtheadmempair-valid.s b/llvm/test/MC/RISCV/rv64xtheadmempair-valid.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64xtheadmempair-valid.s
@@ -0,0 +1,26 @@
+# With Bitmanip base extension:
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+xtheadmempair -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+xtheadmempair < %s \
+# RUN:     | llvm-objdump --mattr=+xtheadmempair -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: th.lwd
+# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xe2]
+th.lwd a0, a1, (a2), 1, 3
+
+# CHECK-ASM-AND-OBJ: th.lwud
+# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xf4]
+th.lwud a0, a1, (a2), 2, 3
+
+# CHECK-ASM-AND-OBJ: th.swd
+# CHECK-ASM: encoding: [0x0b,0x55,0xb6,0xe0]
+th.swd a0, a1, (a2), 0, 3
+
+# CHECK-ASM-AND-OBJ: th.ldd
+# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xf8]
+th.ldd a0, a1, (a2), 0, 4
+
+# CHECK-ASM-AND-OBJ: th.sdd
+# CHECK-ASM: encoding: [0x0b,0x55,0xb6,0xfe]
+th.sdd a0, a1, (a2), 3, 4