Index: lib/CodeGen/TargetSchedule.cpp =================================================================== --- lib/CodeGen/TargetSchedule.cpp +++ lib/CodeGen/TargetSchedule.cpp @@ -232,6 +232,46 @@ return Latency; unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + + // Take extra care with implicit ops added by RegAlloc that are not part + // of the instruction descriptor. + const MCInstrDesc *UseIDesc = &UseMI->getDesc(); + const MachineOperand &UseMO = UseMI->getOperand(UseOperIdx); + const TargetRegisterInfo *TRI = STI->getRegisterInfo(); + if (!Advance && UseMO.isImplicit() && + !UseIDesc->hasImplicitUseOfPhysReg(UseMO.getReg()) && + TRI->isPhysicalRegister(UseMO.getReg())) { + UseIdx = 0; + int SubAdv = INT_MAX; + for (unsigned MOIdx = 0; MOIdx < UseMI->getNumOperands(); MOIdx++) { + const MachineOperand &MO = UseMI->getOperand(MOIdx); + // Only consider operands part of UseDesc. + if (MOIdx >= UseIDesc->getNumOperands() && + ((MO.isDef() && !UseIDesc->hasImplicitDefOfPhysReg(MO.getReg())) || + (MO.isUse() && !UseIDesc->hasImplicitUseOfPhysReg(MO.getReg())))) + continue; + + // Get the smallest advance for any subregister of UseMO. + if (MO.isReg() && MO.readsReg() && !MO.isDef()) { + // We only aim to handle operands added by RA, so if we see a + // virtual register, do nothing (Can't do MRI->isSSA(), since UseMI + // may not be part of any MBB, and MRI is therefore not always + // available). + if (TRI->isVirtualRegister(MO.getReg())) { + SubAdv = 0; + break; + } + if (TRI->isSubRegister(UseMO.getReg(), MO.getReg())) + SubAdv = std::min(SubAdv, STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID)); + UseIdx++; + } + if (SubAdv == 0) + break; + } + if (SubAdv < INT_MAX && SubAdv != 0) + Advance = SubAdv; + } + if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap return 0; return Latency - Advance; Index: test/CodeGen/SystemZ/misched-readadvances.mir =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/misched-readadvances.mir @@ -0,0 +1,411 @@ +# Check that the latency adjustment (ReadAdvance) for the MSY register operand +# is also used on the extra operand for the full register added by RegAlloc. + +# RUN: llc %s -mtriple=s390x-linux-gnu -mcpu=z13 -start-before=machine-scheduler \ +# RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s +# REQUIRES: asserts + +# CHECK: ScheduleDAGMI::schedule starting +# CHECK: SU(3): renamable $r2l = MSR renamable $r2l, renamable $r2l +# CHECK: Latency : 6 +# CHECK: SU(4): renamable $r2l = MSY renamable $r2l, renamable $r1d, -4, $noreg, implicit $r2d +# CHECK: Predecessors: +# CHECK: SU(3): Data Latency=2 Reg=$r2l +# CHECK: SU(3): Data Latency=2 Reg=$r2d + +--- | + ; Function Attrs: nounwind + define dso_local void @Perl_do_sv_dump() local_unnamed_addr #0 { + bb: + %0 = zext i8 undef to i32 + switch i32 %0, label %bb4 [ + i32 12, label %bb1 + i32 14, label %bb1 + i32 11, label %bb2 + i32 13, label %bb3 + i32 8, label %bb5 + ] + + bb1: ; preds = %bb, %bb + unreachable + + bb2: ; preds = %bb + %1 = zext i8 undef to i32 + switch i32 %1, label %bb22 [ + i32 0, label %bb6 + i32 1, label %bb7 + i32 2, label %bb8 + i32 3, label %bb9 + i32 4, label %bb10 + i32 5, label %bb11 + i32 6, label %bb12 + i32 8, label %bb13 + i32 7, label %bb14 + i32 9, label %bb15 + i32 10, label %bb16 + i32 11, label %bb17 + i32 12, label %bb18 + i32 13, label %bb19 + i32 14, label %bb20 + i32 15, label %bb21 + ] + + bb3: ; preds = %bb + unreachable + + bb4: ; preds = %bb + unreachable + + bb5: ; preds = %bb + unreachable + + bb6: ; preds = %bb2 + ret void + + bb7: ; preds = %bb2 + unreachable + + bb8: ; preds = %bb2 + %2 = zext i8 undef to i32 + switch i32 %2, label %bb46 [ + i32 9, label %bb23 + i32 10, label %bb24 + i32 11, label %bb25 + i32 15, label %bb45 + i32 14, label %bb43 + i32 13, label %bb44 + ] + + bb9: ; preds = %bb2 + unreachable + + bb10: ; preds = %bb2 + unreachable + + bb11: ; preds = %bb2 + unreachable + + bb12: ; preds = %bb2 + unreachable + + bb13: ; preds = %bb2 + unreachable + + bb14: ; preds = %bb2 + unreachable + + bb15: ; preds = %bb2 + unreachable + + bb16: ; preds = %bb2 + unreachable + + bb17: ; preds = %bb2 + unreachable + + bb18: ; preds = %bb2 + unreachable + + bb19: ; preds = %bb2 + unreachable + + bb20: ; preds = %bb2 + unreachable + + bb21: ; preds = %bb2 + unreachable + + bb22: ; preds = %bb2 + unreachable + + bb23: ; preds = %bb8 + unreachable + + bb24: ; preds = %bb8 + unreachable + + bb25: ; preds = %bb8 + br label %bb27 + + bb26: ; preds = %bb27 + %tmp = fdiv double undef, %tmp40 + unreachable + + bb27: ; preds = %bb27, %bb25 + %lsr.iv4 = phi [200 x i32]* [ %4, %bb27 ], [ undef, %bb25 ] + %lsr.iv1 = phi i64 [ %lsr.iv.next2, %bb27 ], [ undef, %bb25 ] + %tmp29 = phi double [ %tmp40, %bb27 ], [ 0.000000e+00, %bb25 ] + %lsr.iv45 = bitcast [200 x i32]* %lsr.iv4 to i32* + %tmp30 = fadd double %tmp29, 0.000000e+00 + %scevgep6 = getelementptr i32, i32* %lsr.iv45, i64 -1 + %tmp33 = load i32, i32* %scevgep6, align 4 + %3 = add i64 %lsr.iv1, 4294967291 + %tmp3 = trunc i64 %3 to i32 + %tmp35 = mul i32 %tmp3, %tmp3 + %tmp36 = mul i32 %tmp35, %tmp33 + %tmp37 = sitofp i32 %tmp36 to double + %tmp38 = fadd double %tmp30, %tmp37 + %tmp39 = fadd double %tmp38, 0.000000e+00 + %tmp40 = fadd double %tmp39, 0.000000e+00 + %lsr.iv.next2 = add i64 %lsr.iv1, -4 + %scevgep = getelementptr [200 x i32], [200 x i32]* %lsr.iv4, i64 0, i64 -4 + %4 = bitcast i32* %scevgep to [200 x i32]* + %tmp42 = icmp sgt i64 %lsr.iv.next2, 4 + br i1 %tmp42, label %bb27, label %bb26 + + bb43: ; preds = %bb8 + unreachable + + bb44: ; preds = %bb8 + unreachable + + bb45: ; preds = %bb8 + unreachable + + bb46: ; preds = %bb8 + unreachable + } + + attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector,+vector-enhancements-1" "unsafe-fp-math"="false" "use-soft-float"="false" } + + !llvm.ident = !{!0} + + !0 = !{!"clang version 7.0.0 (http://llvm.org/git/clang.git 5a987ef0b0d883c8f33502e47b9b6f32e0546980) (llvm/llvm-dev-2 830762b5123b0e46a4433ceb262fce551228c9be)"} + +... +--- +name: Perl_do_sv_dump +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: addr64bit } + - { id: 1, class: addr64bit } + - { id: 2, class: vr64bit } + - { id: 3, class: vr64bit } + - { id: 4, class: gr64bit } + - { id: 5, class: gr64bit } + - { id: 6, class: grx32bit } + - { id: 7, class: grx32bit } + - { id: 8, class: grx32bit } + - { id: 9, class: gr64bit } + - { id: 10, class: grx32bit } + - { id: 11, class: addr64bit } + - { id: 12, class: addr64bit } + - { id: 13, class: addr64bit } + - { id: 14, class: grx32bit } + - { id: 15, class: gr64bit } + - { id: 16, class: grx32bit } + - { id: 17, class: addr64bit } + - { id: 18, class: addr64bit } + - { id: 19, class: addr64bit } + - { id: 20, class: gr64bit } + - { id: 21, class: gr64bit } + - { id: 22, class: vr64bit } + - { id: 23, class: fp64bit } + - { id: 24, class: gr64bit } + - { id: 25, class: gr64bit } + - { id: 26, class: fp64bit } + - { id: 27, class: vr64bit } + - { id: 28, class: addr64bit } + - { id: 29, class: gr32bit } + - { id: 30, class: gr32bit } + - { id: 31, class: gr32bit } + - { id: 32, class: fp64bit } + - { id: 33, class: vr64bit } + - { id: 34, class: vr64bit } + - { id: 35, class: addr64bit } + - { id: 36, class: addr64bit } + - { id: 37, class: vr64bit } +jumpTable: + kind: block-address + entries: + - id: 0 + blocks: [ '%bb.9', '%bb.10', '%bb.13', '%bb.14', '%bb.15', + '%bb.16', '%bb.18', '%bb.17', '%bb.19', '%bb.20', + '%bb.21', '%bb.22', '%bb.23', '%bb.24', '%bb.25' ] + - id: 1 + blocks: [ '%bb.27', '%bb.26', '%bb.26', '%bb.32', '%bb.31', + '%bb.33' ] +body: | + bb.0.bb: + successors: %bb.4(0x7ffffffb), %bb.1(0x00000005) + + %6:grx32bit = LHIMux 0 + CHIMux %6, 0, implicit-def $cc + BRC 14, 6, %bb.4, implicit $cc + J %bb.1 + + bb.1.bb: + successors: %bb.7(0x3ffffffe), %bb.2(0x40000002) + liveins: $cc + + BRC 14, 6, %bb.7, implicit killed $cc + J %bb.2 + + bb.2.bb: + successors: %bb.6(0x7ffffff6), %bb.3(0x0000000a) + + %8:grx32bit = LHIMux 0 + CHIMux %8, 0, implicit-def $cc + BRC 14, 6, %bb.6, implicit killed $cc + J %bb.3 + + bb.3.bb1: + successors: + + + bb.4.bb2: + successors: %bb.8(0x3ffffff9), %bb.5(0x40000007) + + %10:grx32bit = LHIMux 1 + CHIMux %10, 0, implicit-def $cc + BRC 14, 6, %bb.8, implicit killed $cc + + bb.5.bb2: + successors: %bb.9(0x00000002), %bb.10(0x7fffffe4), %bb.13(0x00000002), %bb.14(0x00000002), %bb.15(0x00000002), %bb.16(0x00000002), %bb.18(0x00000002), %bb.17(0x00000002), %bb.19(0x00000002), %bb.20(0x00000002), %bb.21(0x00000002), %bb.22(0x00000002), %bb.23(0x00000002), %bb.24(0x00000002), %bb.25(0x00000002) + + %9:gr64bit = LLILF 4294967295 + %11:addr64bit = SLLG %9, $noreg, 3 + %12:addr64bit = LARL %jump-table.0 + %13:addr64bit = LG %11, 0, %12 :: (load 8 from jump-table) + BR %13 + + bb.6.bb3: + successors: + + + bb.7.bb5: + successors: + + + bb.8.bb6: + Return + + bb.9.bb7: + successors: + + + bb.10.bb8: + successors: %bb.28(0x7ffffffa), %bb.11(0x00000006) + + %14:grx32bit = LHIMux 0 + CHIMux %14, 0, implicit-def $cc + BRC 14, 6, %bb.28, implicit killed $cc + J %bb.11 + + bb.11.bb8: + successors: %bb.26(0x00000002), %bb.12(0x7ffffffe) + + %16:grx32bit = LHIMux 1 + CHIMux %16, 0, implicit-def $cc + BRC 14, 6, %bb.26, implicit killed $cc + + bb.12.bb8: + successors: %bb.27(0x20000000), %bb.26(0x00000000), %bb.32(0x20000000), %bb.31(0x20000000), %bb.33(0x20000000) + + %15:gr64bit = LLILF 4294967286 + %17:addr64bit = SLLG %15, $noreg, 3 + %18:addr64bit = LARL %jump-table.1 + %19:addr64bit = LG %17, 0, %18 :: (load 8 from jump-table) + BR %19 + + bb.13.bb9: + successors: + + + bb.14.bb10: + successors: + + + bb.15.bb11: + successors: + + + bb.16.bb12: + successors: + + + bb.17.bb13: + successors: + + + bb.18.bb14: + successors: + + + bb.19.bb15: + successors: + + + bb.20.bb16: + successors: + + + bb.21.bb17: + successors: + + + bb.22.bb18: + successors: + + + bb.23.bb19: + successors: + + + bb.24.bb20: + successors: + + + bb.25.bb21: + successors: + + + bb.26.bb23: + successors: + + + bb.27.bb24: + successors: + + + bb.28.bb25: + %23:fp64bit = LZDR + %35:addr64bit = IMPLICIT_DEF + %36:addr64bit = IMPLICIT_DEF + %37:vr64bit = COPY %23 + J %bb.30 + + bb.29.bb26: + successors: + + + bb.30.bb27: + successors: %bb.30(0x7fffffff), %bb.29(0x00000001) + + %27:vr64bit = WFADB %37, %23 + %4:gr64bit = LAY %36, -4, $noreg + %36:addr64bit = ALGFI %36, 4294967291, implicit-def dead $cc + %36.subreg_l32:addr64bit = MSR %36.subreg_l32, %36.subreg_l32 + %36.subreg_l32:addr64bit = MSY %36.subreg_l32, %35, -4, $noreg :: (load 4 from %ir.scevgep6) + %32:fp64bit = CDFBR %36.subreg_l32 + %33:vr64bit = WFADB %27, %32 + %34:vr64bit = WFADB %33, %23 + %37:vr64bit = WFADB %34, %23 + %35:addr64bit = LAY %35, -16, $noreg + CGHI %4, 4, implicit-def $cc + %36:addr64bit = COPY %4 + BRC 14, 2, %bb.30, implicit killed $cc + J %bb.29 + + bb.31.bb43: + successors: + + + bb.32.bb44: + successors: + + + bb.33.bb45: + +...