Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -686,13 +686,26 @@ /// Copy segments with value number @p SrcValNo from liverange @p Src to live /// range @Dst and use value number @p DstValNo there. -static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, +static bool addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src, const VNInfo *SrcValNo) { + bool Changed = false; for (const LiveRange::Segment &S : Src.segments) { if (S.valno != SrcValNo) continue; - Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo)); - } + // This is adding a segment from Src that ends in a copy that is about + // to be removed. This segment is going to be merged with a pre-existing + // segment in Dst. This works, except in cases when the corresponding + // segment in Dst is dead. For example: adding [192r,208r:1) from Src + // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst. + // In such cases, reset the end of the merged segment to reflect that + // it is dead. + LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo); + LiveRange::Segment &Merged = *Dst.addSegment(Added); + if (Merged.end.isDead()) + Merged.end = Merged.start.getDeadSlot(); + Changed = true; + } + return Changed; } bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, @@ -873,10 +886,13 @@ // Extend BValNo by merging in IntA live segments of AValNo. Val# definition // is updated. BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - if (IntB.hasSubRanges()) { + if (IntA.hasSubRanges() || IntB.hasSubRanges()) { if (!IntA.hasSubRanges()) { LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); IntA.createSubRangeFrom(Allocator, Mask, IntA); + } else if (!IntB.hasSubRanges()) { + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg); + IntB.createSubRangeFrom(Allocator, Mask, IntB); } SlotIndex AIdx = CopyIdx.getRegSlot(true); for (LiveInterval::SubRange &SA : IntA.subranges()) { @@ -889,7 +905,8 @@ ? SR.getNextValue(CopyIdx, Allocator) : SR.getVNInfoAt(CopyIdx); assert(BSubValNo != nullptr); - addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo); + if (addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo)) + BSubValNo->def = ASubValNo->def; }); } } Index: lib/CodeGen/VirtRegMap.cpp =================================================================== --- lib/CodeGen/VirtRegMap.cpp +++ lib/CodeGen/VirtRegMap.cpp @@ -525,7 +525,7 @@ // Preserve semantics of sub-register operands. unsigned SubReg = MO.getSubReg(); if (SubReg != 0) { - if (NoSubRegLiveness) { + if (NoSubRegLiveness || !MRI->shouldTrackSubRegLiveness(VirtReg)) { // A virtual register kill refers to the whole register, so we may // have to add implicit killed operands for the super-register. A // partial redef always kills and redefines the super-register. Index: lib/Target/SystemZ/SystemZSubtarget.h =================================================================== --- lib/Target/SystemZ/SystemZSubtarget.h +++ lib/Target/SystemZ/SystemZSubtarget.h @@ -102,6 +102,8 @@ // Always enable the early if-conversion pass. bool enableEarlyIfConversion() const override { return true; } + bool enableSubRegLiveness() const override { return true; } + // Automatically generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); Index: test/CodeGen/SystemZ/cmpxchg-06.ll =================================================================== --- test/CodeGen/SystemZ/cmpxchg-06.ll +++ test/CodeGen/SystemZ/cmpxchg-06.ll @@ -116,9 +116,9 @@ ; CHECK-LABEL: f10 ; CHECK-DAG: lg %r1, 8(%r3) ; CHECK-DAG: lg %r0, 0(%r3) -; CHECK-DAG: lg %r13, 8(%r2) -; CHECK-DAG: lg %r12, 0(%r2) -; CHECK: cdsg %r12, %r0, 0(%r4) +; CHECK-DAG: lg {{%r[0-9]+}}, 8(%r2) +; CHECK-DAG: lg [[REG:%r[0-9]+]], 0(%r2) +; CHECK: cdsg [[REG]], %r0, 0(%r4) ; CHECK-NEXT: ipm %r2 ; CHECK-NEXT: afi %r2, -268435456 ; CHECK-NEXT: srl %r2, 31 @@ -134,15 +134,13 @@ ; Check using the comparison result for a branch. ; CHECK-LABEL: f11 -; CHECK-DAG: lg %r1, 8(%r3) -; CHECK-DAG: lg %r0, 0(%r3) -; CHECK-DAG: lg %r13, 8(%r2) -; CHECK-DAG: lg %r12, 0(%r2) -; CHECK: cdsg %r12, %r0, 0(%r4) -; CHECK-NEXT: jl [[LABEL:\.[^ ]*]] -; CHECK: jg g -; CHECK: [[LABEL]]: -; CHECK: br %r14 +; CHECK-DAG: lg %r1, 8(%r3) +; CHECK-DAG: lg %r0, 0(%r3) +; CHECK-DAG: lg %r3, 8(%r2) +; CHECK-DAG: lg %r2, 0(%r2) +; CHECK: cdsg %r2, %r0, 0(%r4) +; CHECK-NEXT: jge g +; CHECK: br %r14 define void @f11(i128 %cmp, i128 %swap, i128 *%src) { %pairval = cmpxchg i128 *%src, i128 %cmp, i128 %swap seq_cst seq_cst %cond = extractvalue { i128, i1 } %pairval, 1 @@ -158,15 +156,13 @@ ; ... and the same with the inverted direction. ; CHECK-LABEL: f12 -; CHECK-DAG: lg %r1, 8(%r3) -; CHECK-DAG: lg %r0, 0(%r3) -; CHECK-DAG: lg %r13, 8(%r2) -; CHECK-DAG: lg %r12, 0(%r2) -; CHECK: cdsg %r12, %r0, 0(%r4) -; CHECK-NEXT: jl [[LABEL:\.[^ ]*]] -; CHECK: br %r14 -; CHECK: [[LABEL]]: -; CHECK: jg g +; CHECK-DAG: lg %r1, 8(%r3) +; CHECK-DAG: lg %r0, 0(%r3) +; CHECK-DAG: lg %r3, 8(%r2) +; CHECK-DAG: lg %r2, 0(%r2) +; CHECK: cdsg %r2, %r0, 0(%r4) +; CHECK-NEXT: jgl g +; CHECK: br %r14 define void @f12(i128 %cmp, i128 %swap, i128 *%src) { %pairval = cmpxchg i128 *%src, i128 %cmp, i128 %swap seq_cst seq_cst %cond = extractvalue { i128, i1 } %pairval, 1 Index: test/CodeGen/SystemZ/int-ssub-06.ll =================================================================== --- test/CodeGen/SystemZ/int-ssub-06.ll +++ test/CodeGen/SystemZ/int-ssub-06.ll @@ -73,7 +73,7 @@ ; and must use a register. define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%res) { ; CHECK-LABEL: f5: -; CHECK: llilh [[REG1:%r[0-5]]], 32768 +; CHECK: iilf [[REG1:%r[0-5]]], 2147483648 ; CHECK: sr %r3, [[REG1]] ; CHECK-DAG: st %r3, 0(%r4) ; CHECK-DAG: ipm [[REG:%r[0-5]]] @@ -171,7 +171,7 @@ ; Check the next value down, which must use a register. define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%res) { ; CHECK-LABEL: f11: -; CHECK: llilh [[REG1:%r[0-5]]], 32768 +; CHECK: iilf [[REG1:%r[0-5]]], 2147483648 ; CHECK: sr %r3, [[REG1]] ; CHECK-DAG: st %r3, 0(%r4) ; CHECK-DAG: ipm [[REG:%r[0-5]]] Index: test/CodeGen/SystemZ/tc_subregliveness_DefMI.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/tc_subregliveness_DefMI.ll @@ -0,0 +1,51 @@ +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s + +; Check for successful compilation. +; CHECK: aghi %r15, -160 + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" +target triple = "s390x-ibm-linux" + +%0 = type { i8*, i32, i32 } + +declare i8* @Perl_sv_grow(%0*, i64) #0 + +; Function Attrs: nounwind +define signext i32 @Perl_yylex() #1 { +bb: + br label %bb1 + +bb1: ; preds = %bb3, %bb + %tmp = phi i8* [ %tmp8, %bb3 ], [ undef, %bb ] + %tmp2 = icmp eq i8 undef, 0 + br i1 %tmp2, label %bb9, label %bb3 + +bb3: ; preds = %bb1 + %tmp4 = ptrtoint i8* %tmp to i64 + %tmp5 = sub i64 %tmp4, 0 + %tmp6 = shl i64 %tmp5, 32 + %tmp7 = ashr exact i64 %tmp6, 32 + %tmp8 = getelementptr inbounds i8, i8* null, i64 %tmp7 + br label %bb1 + +bb9: ; preds = %bb1 + br i1 undef, label %bb10, label %bb15 + +bb10: ; preds = %bb9 + %tmp11 = ptrtoint i8* %tmp to i64 + %tmp12 = sub i64 %tmp11, 0 + %tmp13 = call i8* @Perl_sv_grow(%0* nonnull undef, i64 undef) #2 + %tmp14 = getelementptr inbounds i8, i8* %tmp13, i64 %tmp12 + br label %bb15 + +bb15: ; preds = %bb10, %bb9 + %tmp16 = phi i8* [ %tmp14, %bb10 ], [ %tmp, %bb9 ] + %tmp17 = call i8* @Perl_uvuni_to_utf8(i8* %tmp16, i64 undef) #2 + unreachable +} + +declare i8* @Perl_uvuni_to_utf8(i8*, i64) #0 + +attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } Index: test/CodeGen/SystemZ/tc_subregliveness_hasSubRanges.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/tc_subregliveness_hasSubRanges.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s + +; Check for successful compilation. +; CHECK: meeb %f0, 0(%r1) + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" +target triple = "s390x-ibm-linux" + +; Function Attrs: nounwind +define void @spec_random_load() #0 { +bb: + %tmp = sitofp i64 undef to float + %tmp1 = fmul float %tmp, 0x3E00000000000000 + %tmp2 = fpext float %tmp1 to double + %tmp3 = fmul double %tmp2, 2.560000e+02 + %tmp4 = fptosi double %tmp3 to i32 + %tmp5 = trunc i32 %tmp4 to i8 + store i8 %tmp5, i8* undef, align 1 + unreachable +} + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: test/CodeGen/SystemZ/tc_subregliveness_hassubranges.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/tc_subregliveness_hassubranges.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -disable-early-taildup -disable-cgp < %s | FileCheck %s + +; Check for successful compilation. +; CHECK: lhi %r0, -5 + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" +target triple = "s390x-ibm-linux" + +; Function Attrs: nounwind +define void @main() #0 { +bb: + %tmp = xor i8 0, -5 + %tmp1 = sext i8 %tmp to i32 + %tmp2 = icmp sgt i8 0, -1 + br label %bb3 + +bb3: ; preds = %bb15, %bb + %tmp4 = phi i64 [ %tmp16, %bb15 ], [ -1, %bb ] + br i1 undef, label %bb14, label %bb5 + +bb5: ; preds = %bb3 + %tmp6 = or i1 %tmp2, false + %tmp7 = select i1 %tmp6, i32 0, i32 undef + %tmp8 = ashr i32 %tmp1, %tmp7 + %tmp9 = zext i32 %tmp8 to i64 + %tmp10 = shl i64 %tmp9, 48 + %tmp11 = ashr exact i64 %tmp10, 48 + %tmp12 = and i64 %tmp11, %tmp4 + %tmp13 = trunc i64 %tmp12 to i32 + store i32 %tmp13, i32* undef, align 4 + br label %bb15 + +bb14: ; preds = %bb3 + br label %bb15 + +bb15: ; preds = %bb14, %bb5 + %tmp16 = phi i64 [ %tmp4, %bb14 ], [ %tmp12, %bb5 ] + br label %bb3 +} + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: test/CodeGen/SystemZ/tc_subregliveness_livesegend.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/tc_subregliveness_livesegend.ll @@ -0,0 +1,48 @@ +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z10 -verify-machineinstrs < %s | FileCheck %s + +; Check for successful compilation. +; CHECK: lgfrl %r1, g_65 + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64" +target triple = "s390x-ibm-linux" + +@g_65 = external global i32, align 4 + +; Function Attrs: nounwind +define void @main() #0 { +bb: + br label %bb1 + +bb1: ; preds = %bb + %tmp = load i32, i32* @g_65, align 4 + %tmp2 = sext i32 %tmp to i64 + %tmp3 = shl i32 %tmp, 16 + %tmp4 = ashr exact i32 %tmp3, 16 + %tmp5 = shl i32 %tmp4, 0 + %tmp6 = zext i32 %tmp5 to i64 + %tmp7 = shl i64 %tmp6, 48 + %tmp8 = ashr exact i64 %tmp7, 48 + br i1 undef, label %bb12, label %bb9 + +bb9: ; preds = %bb1 + %tmp10 = select i1 undef, i64 0, i64 %tmp2 + %tmp11 = add nsw i64 %tmp10, %tmp8 + br label %bb12 + +bb12: ; preds = %bb9, %bb1 + %tmp13 = phi i64 [ %tmp11, %bb9 ], [ %tmp8, %bb1 ] + %tmp14 = trunc i64 %tmp13 to i32 + %tmp15 = and i32 %tmp14, 255 + %tmp16 = shl i32 %tmp15, 0 + %tmp17 = trunc i32 %tmp16 to i8 + %tmp18 = icmp eq i8 %tmp17, 0 + br i1 %tmp18, label %bb20, label %bb19 + +bb19: ; preds = %bb12 + unreachable + +bb20: ; preds = %bb12 + unreachable +} + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z10" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: test/CodeGen/SystemZ/tc_subregliveness_noliveseg.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/tc_subregliveness_noliveseg.ll @@ -0,0 +1,48 @@ +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs < %s | FileCheck %s + +; Check for successful compilation. +; CHECK: lgfrl %r0, g_399 + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" +target triple = "s390x-ibm-linux" + +@g_439 = external global i32, align 4 +@g_399 = external global { i8, i8, i8, i8, i8, i8 }, align 8 + +; Function Attrs: nounwind +define void @main() #0 { +bb: + %tmp = load i48, i48* bitcast ({ i8, i8, i8, i8, i8, i8 }* @g_399 to i48*), align 8, !noalias !1 + %tmp1 = ashr i48 %tmp, 17 + %tmp2 = trunc i48 %tmp1 to i32 + %tmp3 = sext i32 %tmp2 to i64 + br label %bb4 + +bb4: ; preds = %bb4, %bb + %tmp5 = load i64, i64* undef, align 8, !tbaa !4, !noalias !1 + %tmp6 = urem i64 -923186811629238421, %tmp3 + %tmp7 = or i64 %tmp6, %tmp5 + %tmp8 = trunc i64 %tmp7 to i32 + %tmp9 = lshr i32 %tmp8, 2 + %tmp10 = and i32 %tmp9, 60 + %tmp11 = xor i32 %tmp10, -1592309976 + %tmp12 = or i32 0, %tmp11 + %tmp13 = or i32 %tmp12, 3 + store i32 %tmp13, i32* @g_439, align 4, !tbaa !8, !noalias !1 + br label %bb4 +} + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 8.0.0 (http://llvm.org/git/clang.git c0a5e830f198cf42d29f72f1ec06fbf4c5210e2c) (http://llvm.org/git/llvm.git ffc8c538b70b678031b8617f61f83ee120bcb884)"} +!1 = !{!2} +!2 = distinct !{!2, !3, !"func_1: %agg.result"} +!3 = distinct !{!3, !"func_1"} +!4 = !{!5, !5, i64 0} +!5 = !{!"long", !6, i64 0} +!6 = !{!"omnipotent char", !7, i64 0} +!7 = !{!"Simple C/C++ TBAA"} +!8 = !{!9, !9, i64 0} +!9 = !{!"int", !6, i64 0}