Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -2832,6 +2832,80 @@ return 0; } +// Check if MI (which is expected to be a COPY) has as one of its operands a +// physical register that could be allocated to the other operands virtual +// register. +static bool potentiallyCoalescablePRegCopy(MachineInstr *CopyMI, + const TargetRegisterInfo *TRI, + const MachineRegisterInfo *MRI) { + assert (CopyMI->isCopy() && "Expected a COPY"); + + MachineOperand *PRegMO = nullptr, *VRegMO = nullptr; + if (TargetRegisterInfo::isPhysicalRegister(CopyMI->getOperand(0).getReg())) { + PRegMO = &CopyMI->getOperand(0); + VRegMO = &CopyMI->getOperand(1); + } else { + PRegMO = &CopyMI->getOperand(1); + VRegMO = &CopyMI->getOperand(0); + } + if (!TargetRegisterInfo::isPhysicalRegister(PRegMO->getReg()) || + !TargetRegisterInfo::isVirtualRegister(VRegMO->getReg())) + return false; + unsigned PhysReg = (PRegMO->getSubReg() ? + TRI->getSubReg(PRegMO->getReg(), PRegMO->getSubReg()) : PRegMO->getReg()); + unsigned VirtReg = VRegMO->getReg(); + unsigned VSub = VRegMO->getSubReg(); + + const TargetRegisterClass *VirtRC = MRI->getRegClass(VirtReg); + return (!MRI->isReserved(PhysReg) && + (VirtRC->contains(PhysReg) || + (VSub && TRI->getMatchingSuperReg(PhysReg, VSub, VirtRC)))); +} + +/// Minimize the (virtual) live ranges of copies involving phys-regs. In +/// regions with both incoming and outgoing arguments, this will reduce the +/// risk of overlapping live ranges that will hinder coalescing. In contrast +/// to biasPhysRegCopy(), this does not typically handle COPYs, but rather +/// instructions connected to a COPY involving a phys-reg. +static int copiedPhysRegsUses(const SUnit *SU, bool isTop) { + const MachineInstr *MI = SU->getInstr(); + const MachineFunction *MF = MI->getParent()->getParent(); + const MachineRegisterInfo *MRI = &MF->getRegInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + unsigned NumCopiedPhysRegUses = 0; + // Putting OP closer to the COPY minimizes the chance of %0 interfering + // with physregX: + // %0 = COPY %physregX + // ... + // %1 = OP %0 + for (const MachineOperand &MO : MI->uses()) { + if (!MO.isReg()) + continue; + MachineInstr *DefMI = MRI->getUniqueVRegDef(MO.getReg()); + if (DefMI != nullptr && DefMI->isCopy() && + potentiallyCoalescablePRegCopy(DefMI, TRI, MRI)) + NumCopiedPhysRegUses++; + } + + // Inverse case: + // %1 = OP %0 + // ... + // %physregX = COPY %1 + unsigned DefReg = + ((MI->getNumOperands() && MI->getOperand(0).isReg() && + MI->getOperand(0).isDef() && !MI->getOperand(0).isImplicit()) ? + MI->getOperand(0).getReg() : 0); + if (DefReg && TRI->isVirtualRegister(DefReg)) { + for (MachineOperand &UseMO : MRI->use_operands(DefReg)) { + MachineInstr *UseMI = UseMO.getParent(); + if (UseMI->isCopy() && potentiallyCoalescablePRegCopy(UseMI, TRI, MRI)) + NumCopiedPhysRegUses--; + } + } + + return (isTop ? NumCopiedPhysRegUses : -NumCopiedPhysRegUses); +} + void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, @@ -2974,6 +3048,12 @@ !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) return; + // Try to minimize live ranges of copied physregs. + if (tryGreater(copiedPhysRegsUses(TryCand.SU, TryCand.AtTop), + copiedPhysRegsUses(Cand.SU, Cand.AtTop), + TryCand, Cand, PhysRegCopy)) + return; + // Fall through to original instruction order. if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { Index: test/CodeGen/SystemZ/args-11.mir =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/args-11.mir @@ -0,0 +1,90 @@ +# RUN: llc -o - %s -mtriple=s390x-linux-gnu -start-before=machine-scheduler \ +# RUN: -enable-misched | FileCheck %s + +# Test that an extra COPY due to poor pre-RA scheduling is avoided. This +# would happen if the '%7 = ADJDYNALLOC ...' is scheduled above the three +# ADJDYNALLOCs using %1 (%r3d dependency) + +--- | + + declare i64 @bar(i8*, i8*, i8*, i8*, i8*, i64, i64) + + define i64 @f1(i64 %length, i64 %index) { + %a = alloca i8, i64 %length + %b = getelementptr i8, i8* %a, i64 1 + %cindex = add i64 %index, 3919 + %c = getelementptr i8, i8* %a, i64 %cindex + %dindex = add i64 %index, 3920 + %d = getelementptr i8, i8* %a, i64 %dindex + %eindex = add i64 %index, 4095 + %e = getelementptr i8, i8* %a, i64 %eindex + %count = call i64 @bar(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i64 0, i64 0) + %res = add i64 %count, 1 + ret i64 %res + } + +... + +# CHECK: la %r3, 177(%r1) +# CHECK-NOT: la %r0, 177(%r1) +# CHECK-NOT: lgr %r3, %r0 + +--- +name: f1 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: addr64bit } + - { id: 1, class: addr64bit } + - { id: 2, class: gr64bit } + - { id: 3, class: gr64bit } + - { id: 4, class: gr64bit } + - { id: 5, class: addr64bit } + - { id: 6, class: gr64bit } + - { id: 7, class: gr64bit } + - { id: 8, class: gr64bit } + - { id: 9, class: gr64bit } + - { id: 10, class: gr64bit } + - { id: 11, class: addr64bit } + - { id: 12, class: addr64bit } + - { id: 13, class: gr64bit } +liveins: + - { reg: '%r2d', virtual-reg: '%0' } + - { reg: '%r3d', virtual-reg: '%1' } +frameInfo: + maxAlignment: 8 + hasCalls: true +stack: + - { id: 0, name: a, type: variable-sized, alignment: 1, stack-id: 0 } +body: | + bb.0 (%ir-block.0): + liveins: %r2d, %r3d + + %1 = COPY %r3d + %0 = COPY %r2d + %3 = LA %0, 7, _ + %3 = NILL64 %3, 65528, implicit-def dead %cc + %5 = COPY %r15d + %5 = SGR %5, %3, implicit-def dead %cc + %6 = ADJDYNALLOC %5, 0, _ + %r15d = COPY %5 + %7 = ADJDYNALLOC %5, 1, _ + %8 = ADJDYNALLOC %5, 3919, %1 + %9 = ADJDYNALLOC %5, 3920, %1 + %10 = ADJDYNALLOC %5, 4095, %1 + ADJCALLSTACKDOWN 16, 0 + MVGHI %r15d, 168, 0 :: (store 8) + MVGHI %r15d, 160, 0 :: (store 8) + %r2d = COPY %6 + %r3d = COPY %7 + %r4d = COPY %8 + %r5d = COPY %9 + %r6d = COPY %10 + CallBRASL @bar, %r2d, %r3d, killed %r4d, killed %r5d, killed %r6d, csr_systemz, implicit-def dead %r14d, implicit-def dead %cc, implicit-def %r2d + ADJCALLSTACKUP 16, 0 + %12 = COPY %r2d + %13 = LA %12, 1, _ + %r2d = COPY %13 + Return implicit %r2d + +...