Index: lib/CodeGen/RegisterCoalescer.cpp
===================================================================
--- lib/CodeGen/RegisterCoalescer.cpp
+++ lib/CodeGen/RegisterCoalescer.cpp
@@ -686,13 +686,26 @@
 
 /// Copy segments with value number @p SrcValNo from liverange @p Src to live
 /// range @Dst and use value number @p DstValNo there.
-static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
+static bool addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
                                  const LiveRange &Src, const VNInfo *SrcValNo) {
+  bool Changed = false;
   for (const LiveRange::Segment &S : Src.segments) {
     if (S.valno != SrcValNo)
       continue;
-    Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo));
-  }
+    // This is adding a segment from Src that ends in a copy that is about
+    // to be removed. This segment is going to be merged with a pre-existing
+    // segment in Dst. This works, except in cases when the corresponding
+    // segment in Dst is dead. For example: adding [192r,208r:1) from Src
+    // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst.
+    // In such cases, reset the end of the merged segment to reflect that
+    // it is dead.
+    LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo);
+    LiveRange::Segment &Merged = *Dst.addSegment(Added);
+    if (Merged.end.isDead())
+      Merged.end = Merged.start.getDeadSlot();
+    Changed = true;
+  }
+  return Changed;
 }
 
 bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
@@ -873,10 +886,13 @@
   // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
   // is updated.
   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
-  if (IntB.hasSubRanges()) {
+  if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
     if (!IntA.hasSubRanges()) {
       LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
       IntA.createSubRangeFrom(Allocator, Mask, IntA);
+    } else if (!IntB.hasSubRanges()) {
+      LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg);
+      IntB.createSubRangeFrom(Allocator, Mask, IntB);
     }
     SlotIndex AIdx = CopyIdx.getRegSlot(true);
     for (LiveInterval::SubRange &SA : IntA.subranges()) {
@@ -889,7 +905,8 @@
           ? SR.getNextValue(CopyIdx, Allocator)
           : SR.getVNInfoAt(CopyIdx);
         assert(BSubValNo != nullptr);
-        addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
+        if (addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo))
+          BSubValNo->def = ASubValNo->def;
       });
     }
   }
Index: lib/CodeGen/VirtRegMap.cpp
===================================================================
--- lib/CodeGen/VirtRegMap.cpp
+++ lib/CodeGen/VirtRegMap.cpp
@@ -525,7 +525,7 @@
         // Preserve semantics of sub-register operands.
         unsigned SubReg = MO.getSubReg();
         if (SubReg != 0) {
-          if (NoSubRegLiveness) {
+          if (NoSubRegLiveness || !MRI->shouldTrackSubRegLiveness(VirtReg)) {
             // A virtual register kill refers to the whole register, so we may
             // have to add implicit killed operands for the super-register.  A
             // partial redef always kills and redefines the super-register.
Index: lib/Target/SystemZ/SystemZSubtarget.h
===================================================================
--- lib/Target/SystemZ/SystemZSubtarget.h
+++ lib/Target/SystemZ/SystemZSubtarget.h
@@ -102,6 +102,8 @@
   // Always enable the early if-conversion pass.
   bool enableEarlyIfConversion() const override { return true; }
 
+  bool enableSubRegLiveness() const override { return true; }
+
   // Automatically generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
Index: test/CodeGen/SystemZ/cmpxchg-06.ll
===================================================================
--- test/CodeGen/SystemZ/cmpxchg-06.ll
+++ test/CodeGen/SystemZ/cmpxchg-06.ll
@@ -116,9 +116,9 @@
 ; CHECK-LABEL: f10
 ; CHECK-DAG: lg %r1, 8(%r3)
 ; CHECK-DAG: lg %r0, 0(%r3)
-; CHECK-DAG: lg %r13, 8(%r2)
-; CHECK-DAG: lg %r12, 0(%r2)
-; CHECK:     cdsg %r12, %r0, 0(%r4)
+; CHECK-DAG: lg {{%r[0-9]+}}, 8(%r2)
+; CHECK-DAG: lg [[REG:%r[0-9]+]], 0(%r2)
+; CHECK:     cdsg [[REG]], %r0, 0(%r4)
 ; CHECK-NEXT: ipm %r2
 ; CHECK-NEXT: afi %r2, -268435456
 ; CHECK-NEXT: srl %r2, 31
@@ -134,15 +134,13 @@
 
 ; Check using the comparison result for a branch.
 ; CHECK-LABEL: f11
-; CHECK-DAG: lg %r1, 8(%r3)
-; CHECK-DAG: lg %r0, 0(%r3)
-; CHECK-DAG: lg %r13, 8(%r2)
-; CHECK-DAG: lg %r12, 0(%r2)
-; CHECK:     cdsg %r12, %r0, 0(%r4)
-; CHECK-NEXT: jl [[LABEL:\.[^ ]*]]
-; CHECK: jg g
-; CHECK: [[LABEL]]:
-; CHECK: br %r14
+; CHECK-DAG:  lg %r1, 8(%r3)
+; CHECK-DAG:  lg %r0, 0(%r3)
+; CHECK-DAG:  lg %r3, 8(%r2)
+; CHECK-DAG:  lg %r2, 0(%r2)
+; CHECK:      cdsg %r2, %r0, 0(%r4)
+; CHECK-NEXT: jge g
+; CHECK:      br %r14
 define void @f11(i128 %cmp, i128 %swap, i128 *%src) {
   %pairval = cmpxchg i128 *%src, i128 %cmp, i128 %swap seq_cst seq_cst
   %cond = extractvalue { i128, i1 } %pairval, 1
@@ -158,15 +156,13 @@
 
 ; ... and the same with the inverted direction.
 ; CHECK-LABEL: f12
-; CHECK-DAG: lg %r1, 8(%r3)
-; CHECK-DAG: lg %r0, 0(%r3)
-; CHECK-DAG: lg %r13, 8(%r2)
-; CHECK-DAG: lg %r12, 0(%r2)
-; CHECK:     cdsg %r12, %r0, 0(%r4)
-; CHECK-NEXT: jl [[LABEL:\.[^ ]*]]
-; CHECK: br %r14
-; CHECK: [[LABEL]]:
-; CHECK: jg g
+; CHECK-DAG:  lg %r1, 8(%r3)
+; CHECK-DAG:  lg %r0, 0(%r3)
+; CHECK-DAG:  lg %r3, 8(%r2)
+; CHECK-DAG:  lg %r2, 0(%r2)
+; CHECK:      cdsg %r2, %r0, 0(%r4)
+; CHECK-NEXT: jgl g
+; CHECK:      br %r14
 define void @f12(i128 %cmp, i128 %swap, i128 *%src) {
   %pairval = cmpxchg i128 *%src, i128 %cmp, i128 %swap seq_cst seq_cst
   %cond = extractvalue { i128, i1 } %pairval, 1
Index: test/CodeGen/SystemZ/int-ssub-06.ll
===================================================================
--- test/CodeGen/SystemZ/int-ssub-06.ll
+++ test/CodeGen/SystemZ/int-ssub-06.ll
@@ -73,7 +73,7 @@
 ; and must use a register.
 define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%res) {
 ; CHECK-LABEL: f5:
-; CHECK: llilh [[REG1:%r[0-5]]], 32768
+; CHECK: iilf [[REG1:%r[0-5]]], 2147483648
 ; CHECK: sr %r3, [[REG1]]
 ; CHECK-DAG: st %r3, 0(%r4)
 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
@@ -171,7 +171,7 @@
 ; Check the next value down, which must use a register.
 define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%res) {
 ; CHECK-LABEL: f11:
-; CHECK: llilh [[REG1:%r[0-5]]], 32768
+; CHECK: iilf [[REG1:%r[0-5]]], 2147483648
 ; CHECK: sr %r3, [[REG1]]
 ; CHECK-DAG: st %r3, 0(%r4)
 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
Index: test/CodeGen/SystemZ/tc_subregliveness_DefMI.ll
===================================================================
--- /dev/null
+++ test/CodeGen/SystemZ/tc_subregliveness_DefMI.ll
@@ -0,0 +1,51 @@
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s
+
+; Check for successful compilation.
+; CHECK: aghi %r15, -160
+
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
+target triple = "s390x-ibm-linux"
+
+%0 = type { i8*, i32, i32 }
+
+declare i8* @Perl_sv_grow(%0*, i64) #0
+
+; Function Attrs: nounwind
+define signext i32 @Perl_yylex() #1 {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb3, %bb
+  %tmp = phi i8* [ %tmp8, %bb3 ], [ undef, %bb ]
+  %tmp2 = icmp eq i8 undef, 0
+  br i1 %tmp2, label %bb9, label %bb3
+
+bb3:                                              ; preds = %bb1
+  %tmp4 = ptrtoint i8* %tmp to i64
+  %tmp5 = sub i64 %tmp4, 0
+  %tmp6 = shl i64 %tmp5, 32
+  %tmp7 = ashr exact i64 %tmp6, 32
+  %tmp8 = getelementptr inbounds i8, i8* null, i64 %tmp7
+  br label %bb1
+
+bb9:                                              ; preds = %bb1
+  br i1 undef, label %bb10, label %bb15
+
+bb10:                                             ; preds = %bb9
+  %tmp11 = ptrtoint i8* %tmp to i64
+  %tmp12 = sub i64 %tmp11, 0
+  %tmp13 = call i8* @Perl_sv_grow(%0* nonnull undef, i64 undef) #2
+  %tmp14 = getelementptr inbounds i8, i8* %tmp13, i64 %tmp12
+  br label %bb15
+
+bb15:                                             ; preds = %bb10, %bb9
+  %tmp16 = phi i8* [ %tmp14, %bb10 ], [ %tmp, %bb9 ]
+  %tmp17 = call i8* @Perl_uvuni_to_utf8(i8* %tmp16, i64 undef) #2
+  unreachable
+}
+
+declare i8* @Perl_uvuni_to_utf8(i8*, i64) #0
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
Index: test/CodeGen/SystemZ/tc_subregliveness_hasSubRanges.ll
===================================================================
--- /dev/null
+++ test/CodeGen/SystemZ/tc_subregliveness_hasSubRanges.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s
+
+; Check for successful compilation.
+; CHECK: meeb %f0, 0(%r1)
+
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
+target triple = "s390x-ibm-linux"
+
+; Function Attrs: nounwind
+define void @spec_random_load() #0 {
+bb:
+  %tmp = sitofp i64 undef to float
+  %tmp1 = fmul float %tmp, 0x3E00000000000000
+  %tmp2 = fpext float %tmp1 to double
+  %tmp3 = fmul double %tmp2, 2.560000e+02
+  %tmp4 = fptosi double %tmp3 to i32
+  %tmp5 = trunc i32 %tmp4 to i8
+  store i8 %tmp5, i8* undef, align 1
+  unreachable
+}
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
Index: test/CodeGen/SystemZ/tc_subregliveness_hassubranges.ll
===================================================================
--- /dev/null
+++ test/CodeGen/SystemZ/tc_subregliveness_hassubranges.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -disable-early-taildup -disable-cgp < %s | FileCheck %s
+
+; Check for successful compilation.
+; CHECK: lhi %r0, -5
+
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
+target triple = "s390x-ibm-linux"
+
+; Function Attrs: nounwind
+define void @main() #0 {
+bb:
+  %tmp = xor i8 0, -5
+  %tmp1 = sext i8 %tmp to i32
+  %tmp2 = icmp sgt i8 0, -1
+  br label %bb3
+
+bb3:                                              ; preds = %bb15, %bb
+  %tmp4 = phi i64 [ %tmp16, %bb15 ], [ -1, %bb ]
+  br i1 undef, label %bb14, label %bb5
+
+bb5:                                              ; preds = %bb3
+  %tmp6 = or i1 %tmp2, false
+  %tmp7 = select i1 %tmp6, i32 0, i32 undef
+  %tmp8 = ashr i32 %tmp1, %tmp7
+  %tmp9 = zext i32 %tmp8 to i64
+  %tmp10 = shl i64 %tmp9, 48
+  %tmp11 = ashr exact i64 %tmp10, 48
+  %tmp12 = and i64 %tmp11, %tmp4
+  %tmp13 = trunc i64 %tmp12 to i32
+  store i32 %tmp13, i32* undef, align 4
+  br label %bb15
+
+bb14:                                             ; preds = %bb3
+  br label %bb15
+
+bb15:                                             ; preds = %bb14, %bb5
+  %tmp16 = phi i64 [ %tmp4, %bb14 ], [ %tmp12, %bb5 ]
+  br label %bb3
+}
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
Index: test/CodeGen/SystemZ/tc_subregliveness_livesegend.ll
===================================================================
--- /dev/null
+++ test/CodeGen/SystemZ/tc_subregliveness_livesegend.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z10 -verify-machineinstrs < %s | FileCheck %s
+
+; Check for successful compilation.
+; CHECK: lgfrl %r1, g_65
+
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"
+target triple = "s390x-ibm-linux"
+
+@g_65 = external global i32, align 4
+
+; Function Attrs: nounwind
+define void @main() #0 {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp = load i32, i32* @g_65, align 4
+  %tmp2 = sext i32 %tmp to i64
+  %tmp3 = shl i32 %tmp, 16
+  %tmp4 = ashr exact i32 %tmp3, 16
+  %tmp5 = shl i32 %tmp4, 0
+  %tmp6 = zext i32 %tmp5 to i64
+  %tmp7 = shl i64 %tmp6, 48
+  %tmp8 = ashr exact i64 %tmp7, 48
+  br i1 undef, label %bb12, label %bb9
+
+bb9:                                              ; preds = %bb1
+  %tmp10 = select i1 undef, i64 0, i64 %tmp2
+  %tmp11 = add nsw i64 %tmp10, %tmp8
+  br label %bb12
+
+bb12:                                             ; preds = %bb9, %bb1
+  %tmp13 = phi i64 [ %tmp11, %bb9 ], [ %tmp8, %bb1 ]
+  %tmp14 = trunc i64 %tmp13 to i32
+  %tmp15 = and i32 %tmp14, 255
+  %tmp16 = shl i32 %tmp15, 0
+  %tmp17 = trunc i32 %tmp16 to i8
+  %tmp18 = icmp eq i8 %tmp17, 0
+  br i1 %tmp18, label %bb20, label %bb19
+
+bb19:                                             ; preds = %bb12
+  unreachable
+
+bb20:                                             ; preds = %bb12
+  unreachable
+}
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z10" "unsafe-fp-math"="false" "use-soft-float"="false" }
Index: test/CodeGen/SystemZ/tc_subregliveness_noliveseg.ll
===================================================================
--- /dev/null
+++ test/CodeGen/SystemZ/tc_subregliveness_noliveseg.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs < %s | FileCheck %s
+
+; Check for successful compilation.
+; CHECK: lgfrl %r0, g_399
+
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
+target triple = "s390x-ibm-linux"
+
+@g_439 = external global i32, align 4
+@g_399 = external global { i8, i8, i8, i8, i8, i8 }, align 8
+
+; Function Attrs: nounwind
+define void @main() #0 {
+bb:
+  %tmp = load i48, i48* bitcast ({ i8, i8, i8, i8, i8, i8 }* @g_399 to i48*), align 8, !noalias !1
+  %tmp1 = ashr i48 %tmp, 17
+  %tmp2 = trunc i48 %tmp1 to i32
+  %tmp3 = sext i32 %tmp2 to i64
+  br label %bb4
+
+bb4:                                              ; preds = %bb4, %bb
+  %tmp5 = load i64, i64* undef, align 8, !tbaa !4, !noalias !1
+  %tmp6 = urem i64 -923186811629238421, %tmp3
+  %tmp7 = or i64 %tmp6, %tmp5
+  %tmp8 = trunc i64 %tmp7 to i32
+  %tmp9 = lshr i32 %tmp8, 2
+  %tmp10 = and i32 %tmp9, 60
+  %tmp11 = xor i32 %tmp10, -1592309976
+  %tmp12 = or i32 0, %tmp11
+  %tmp13 = or i32 %tmp12, 3
+  store i32 %tmp13, i32* @g_439, align 4, !tbaa !8, !noalias !1
+  br label %bb4
+}
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 8.0.0 (http://llvm.org/git/clang.git c0a5e830f198cf42d29f72f1ec06fbf4c5210e2c) (http://llvm.org/git/llvm.git ffc8c538b70b678031b8617f61f83ee120bcb884)"}
+!1 = !{!2}
+!2 = distinct !{!2, !3, !"func_1: %agg.result"}
+!3 = distinct !{!3, !"func_1"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"long", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"int", !6, i64 0}