Index: llvm/include/llvm/CodeGen/TargetLowering.h
===================================================================
--- llvm/include/llvm/CodeGen/TargetLowering.h
+++ llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1465,7 +1465,13 @@
   /// like i140, which are first promoted then expanded, it is the number of
   /// registers needed to hold all the bits of the original type.  For an i140
   /// on a 32 bit machine this means 5 registers.
-  unsigned getNumRegisters(LLVMContext &Context, EVT VT) const {
+  ///
+  /// RC is typically not needed, but it may be passed as a way to override
+  /// the default settings, for instance with i128 inline assembly operands
+  /// on SystemZ.
+  virtual unsigned
+  getNumRegisters(LLVMContext &Context, EVT VT,
+                  const TargetRegisterClass *RC = nullptr) const {
     if (VT.isSimple()) {
       assert((unsigned)VT.getSimpleVT().SimpleTy <
                 array_lengthof(NumRegistersForVT));
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -942,6 +942,7 @@
                                         SelectionDAG &DAG,
                                         std::vector<SDValue> &Ops) const {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  const TargetRegisterClass *RC = nullptr;
 
   unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
   if (HasMatching)
@@ -953,7 +954,7 @@
     // Don't do this for tied operands that can use the regclass information
     // from the def.
     const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
-    const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
+    RC = MRI.getRegClass(Regs.front());
     Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
   }
 
@@ -979,8 +980,13 @@
   }
 
   for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+    // Pass RC to getNumRegisters() since inline assembly operands may not
+    // follow the ordinary pattern of splitting.
     MVT RegisterVT = RegVTs[Value];
+    if (RC == nullptr)
+      RC = TLI.getRegClassFor(RegisterVT);
+    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value],
+                                           RC);
     for (unsigned i = 0; i != NumRegs; ++i) {
       assert(Reg < Regs.size() && "Mismatch in # registers expected");
       unsigned TheReg = Regs[Reg++];
@@ -8177,7 +8183,7 @@
   // remember that AX is actually i16 to get the right extension.
   const MVT RegVT = *TRI.legalclasstypes_begin(*RC);
 
-  if (OpInfo.ConstraintVT != MVT::Other) {
+  if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) {
     // If this is an FP operand in an integer register (or visa versa), or more
     // generally if the operand value disagrees with the register class we plan
     // to stick it in, fix the operand type.
@@ -8224,7 +8230,7 @@
   // Initialize NumRegs.
   unsigned NumRegs = 1;
   if (OpInfo.ConstraintVT != MVT::Other)
-    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
+    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT, RC);
 
   // If this is a constraint for a specific physical register, like {r17},
   // assign it now.
Index: llvm/lib/Target/SystemZ/SystemZISelLowering.h
===================================================================
--- llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -422,6 +422,19 @@
       return TypeWidenVector;
     return TargetLoweringBase::getPreferredVectorAction(VT);
   }
+  const TargetRegisterClass *
+  getRegClassFor(MVT VT, bool isDivergent = false) const override {
+    if (VT == MVT::Untyped)  // Needed for inline asm phys regs.
+      return &SystemZ::GR128BitRegClass;
+    return TargetLowering::getRegClassFor(VT);
+  }
+  unsigned
+  getNumRegisters(LLVMContext &Context, EVT VT,
+                  const TargetRegisterClass *RC = nullptr) const override {
+    if (VT == MVT::i128 && RC == &SystemZ::GR128BitRegClass)
+      return 1;
+    return TargetLowering::getNumRegisters(Context, VT);
+  }
   bool isCheapToSpeculateCtlz() const override { return true; }
   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
                          EVT) const override;
@@ -517,6 +530,15 @@
   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
   bool allowTruncateForTailCall(Type *, Type *) const override;
   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
+  bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL,
+                                   SDValue Val, SDValue *Parts,
+                                   unsigned NumParts, MVT PartVT,
+                                   Optional<CallingConv::ID> CC) const override;
+  SDValue
+  joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
+                             const SDValue *Parts, unsigned NumParts,
+                             MVT PartVT, EVT ValueVT,
+                             Optional<CallingConv::ID> CC) const override;
   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
                                bool isVarArg,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
===================================================================
--- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1368,6 +1368,55 @@
   }
 }
 
+static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
+  SDLoc DL(In);
+  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
+                           DAG.getIntPtrConstant(0, DL));
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
+                           DAG.getIntPtrConstant(1, DL));
+  SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
+                                    MVT::Untyped, Hi, Lo);
+  return SDValue(Pair, 0);
+}
+
+static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {
+  SDLoc DL(In);
+  SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
+                                          DL, MVT::i64, In);
+  SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
+                                          DL, MVT::i64, In);
+  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
+}
+
+bool SystemZTargetLowering::splitValueIntoRegisterParts(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
+    unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
+  EVT ValueVT = Val.getValueType();
+  assert((ValueVT != MVT::i128 ||
+          ((NumParts == 1 && PartVT == MVT::Untyped) ||
+           (NumParts == 2 && PartVT == MVT::i64))) &&
+         "Unknown handling of i128 value.");
+  if (ValueVT == MVT::i128 && NumParts == 1) {
+    // Inline assembly operand.
+    Parts[0] = lowerI128ToGR128(DAG, Val);
+    return true;
+  }
+  return false;
+}
+
+SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
+    SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
+    MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
+  assert((ValueVT != MVT::i128 ||
+          ((NumParts == 1 && PartVT == MVT::Untyped) ||
+           (NumParts == 2 && PartVT == MVT::i64))) &&
+         "Unknown handling of i128 value.");
+  if (ValueVT == MVT::i128 && NumParts == 1)
+    // Inline assembly operand.
+    return lowerGR128ToI128(DAG, Parts[0]);
+  return SDValue();
+}
+
 SDValue SystemZTargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
@@ -5489,27 +5538,6 @@
 
 // Lower operations with invalid operand or result types (currently used
 // only for 128-bit integer types).
-
-static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
-  SDLoc DL(In);
-  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
-                           DAG.getIntPtrConstant(0, DL));
-  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
-                           DAG.getIntPtrConstant(1, DL));
-  SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
-                                    MVT::Untyped, Hi, Lo);
-  return SDValue(Pair, 0);
-}
-
-static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {
-  SDLoc DL(In);
-  SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
-                                          DL, MVT::i64, In);
-  SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
-                                          DL, MVT::i64, In);
-  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
-}
-
 void
 SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
                                              SmallVectorImpl<SDValue> &Results,
Index: llvm/test/CodeGen/SystemZ/inline-asm-i128.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/SystemZ/inline-asm-i128.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=s390x-linux-gnu -no-integrated-as < %s | FileCheck %s
+;
+; Test i128 (tied) operands.
+
+define i32 @clcl(i8* %p1, i32 signext %l1, i8* %p2, i32 signext %l2, i8 zeroext %pad) {
+; CHECK-LABEL: clcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lgr %r0, %r5
+; CHECK-NEXT:    # kill: def $r4d killed $r4d def $r4q
+; CHECK-NEXT:    lgr %r1, %r3
+; CHECK-NEXT:    # kill: def $r2d killed $r2d def $r2q
+; CHECK-NEXT:    sllg %r5, %r6, 24
+; CHECK-NEXT:    rosbg %r5, %r0, 40, 63, 0
+; CHECK-NEXT:    risbg %r3, %r1, 40, 191, 0
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    clcl %r2, %r4
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    ogr %r3, %r5
+; CHECK-NEXT:    risbg %r0, %r3, 40, 191, 0
+; CHECK-NEXT:    ipm %r2
+; CHECK-NEXT:    afi %r2, -268435456
+; CHECK-NEXT:    srl %r2, 31
+; CHECK-NEXT:    br %r14
+entry:
+  %0 = ptrtoint i8* %p1 to i64
+  %1 = ptrtoint i8* %p2 to i64
+  %and5 = and i32 %l2, 16777215
+  %2 = zext i32 %and5 to i64
+  %conv7 = zext i8 %pad to i64
+  %shl = shl nuw nsw i64 %conv7, 24
+  %or = or i64 %shl, %2
+  %u1.sroa.0.0.insert.ext = zext i64 %0 to i128
+  %u1.sroa.0.0.insert.shift = shl nuw i128 %u1.sroa.0.0.insert.ext, 64
+  %3 = and i32 %l1, 16777215
+  %u1.sroa.0.0.insert.mask = zext i32 %3 to i128
+  %u1.sroa.0.0.insert.insert = or i128 %u1.sroa.0.0.insert.shift, %u1.sroa.0.0.insert.mask
+  %u2.sroa.5.0.insert.ext = zext i64 %or to i128
+  %u2.sroa.0.0.insert.ext = zext i64 %1 to i128
+  %u2.sroa.0.0.insert.shift = shl nuw i128 %u2.sroa.0.0.insert.ext, 64
+  %u2.sroa.0.0.insert.insert = or i128 %u2.sroa.0.0.insert.shift, %u2.sroa.5.0.insert.ext
+  %4 = tail call { i128, i128 } asm "clcl $0, $1", "=r,=r,0,1"(i128 %u1.sroa.0.0.insert.insert, i128 %u2.sroa.0.0.insert.insert)
+  %asmresult = extractvalue { i128, i128 } %4, 0
+  %asmresult11 = extractvalue { i128, i128 } %4, 1
+  %5 = or i128 %asmresult, %asmresult11
+  %6 = and i128 %5, 16777215
+  %7 = icmp eq i128 %6, 0
+  %land.ext = zext i1 %7 to i32
+  ret i32 %land.ext
+}
+
+; Test a tied phys-reg.
+define void @fun(i128* %Src, i128* %Dst) {
+; CHECK-LABEL: fun:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lg %r5, 8(%r2)
+; CHECK-NEXT:    lg %r4, 0(%r2)
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    BLA %r4, %r4
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    stg %r5, 8(%r3)
+; CHECK-NEXT:    stg %r4, 0(%r3)
+; CHECK-NEXT:    br %r14
+entry:
+  %L = load i128, i128* %Src
+  %IAsm = call i128 asm "BLA $0, $1", "={r4},0"(i128 %L)
+  store volatile i128 %IAsm, i128* %Dst
+  ret void
+}