Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -1750,6 +1750,9 @@ LiveInterval &RHS = LIS->getInterval(SrcReg); DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); + if (!MRI->hasOneDef(SrcReg)) + return false; + assert(RHS.containsOneValue() && "Invalid join with reserved register"); // Optimization for reserved registers like ESP. We can only merge with a @@ -1805,27 +1808,43 @@ // => // %Y = def // ... - if (!MRI->hasOneNonDBGUse(SrcReg)) { - DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); - return false; - } + const MachineOperand &DefMO = *MRI->def_begin(SrcReg); - MachineInstr &DestMI = *MRI->getVRegDef(SrcReg); CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg); - SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); - SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); + SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI); + const MachineInstr &DestMI = *DefMO.getParent(); + SlotIndex DestRegIdx = + LIS->getInstructionIndex(DestMI).getRegSlot(DefMO.isEarlyClobber()); if (!MRI->isConstantPhysReg(DstReg)) { // We checked above that there are no interfering defs of the physical // register. However, for this case, where we intent to move up the def of - // the physical register, we also need to check for interfering uses. - SlotIndexes *Indexes = LIS->getSlotIndexes(); - for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx); - SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) { - MachineInstr *MI = LIS->getInstructionFromIndex(SI); - if (MI->readsRegister(DstReg, TRI)) { - DEBUG(dbgs() << "\t\tInterference (read): " << *MI); - return false; + // the physical register, we also need to check that there are no uses + // of DstReg during the lifetime of SrcReg. + SlotIndexes &Indexes = *LIS->getSlotIndexes(); + for (const LiveRange::Segment &S : RHS) { + // Adjust Start to point to an instruction. The segment either starts + // at block begin or at the defining instruction. + // Segments start at the block begin which has no + // instruction associated or at the definition instruction of SrcReg. + // We can ignore the defining instructions uses unless the def is in the + // earlyclobber slot. + SlotIndex Start = S.start; + if (Start.isBlock() || !DestRegIdx.isEarlyClobber()) { + assert(Start.isBlock() || + (SlotIndex::isSameInstr(Start, DestRegIdx) && + !DestRegIdx.isEarlyClobber()) && "block begin or unique def"); + Start = Indexes.getNextNonNullIndex(Start); + } + + // Check instructions in range for uses of DstReg. + for (SlotIndex SI = Start; SI < S.end; + SI = Indexes.getNextNonNullIndex(SI)) { + MachineInstr &MI = *LIS->getInstructionFromIndex(SI); + if (MI.readsRegister(DstReg, TRI)) { + DEBUG(dbgs() << "\t\tInterference (read): " << MI); + return false; + } } } } @@ -1833,9 +1852,9 @@ // We're going to remove the copy which defines a physical reserved // register, so remove its valno, etc. DEBUG(dbgs() << "\t\tRemoving phys reg def of " << PrintReg(DstReg, TRI) - << " at " << CopyRegIdx << "\n"); + << " at " << CopyIdx << "\n"); - LIS->removePhysRegDefAt(DstReg, CopyRegIdx); + LIS->removePhysRegDefAt(DstReg, CopyIdx.getRegSlot()); // Create a new dead def at the new def location. for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) { LiveRange &LR = LIS->getRegUnit(*UI); Index: test/CodeGen/AArch64/regcoal-physreg.mir =================================================================== --- test/CodeGen/AArch64/regcoal-physreg.mir +++ test/CodeGen/AArch64/regcoal-physreg.mir @@ -2,12 +2,14 @@ --- | declare void @f2() - define void @func() { ret void } + define void @func0() { ret void } + define void @func1() { ret void } + define void @func2() { ret void } ... --- # Check coalescing of COPYs from reserved physregs. -# CHECK-LABEL: name: func -name: func +# CHECK-LABEL: name: func0 +name: func0 registers: - { id: 0, class: gpr32 } - { id: 1, class: gpr64 } @@ -80,3 +82,81 @@ BL @f2, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp STRXui %7, %x1, 0 ... +--- +# It should be possible to coalesce "%0 = COPY %fp" even with the contrived CFG +# CHECK-LABEL: name: func1 +# CHECK-NOT: %fp = COPY %0 +# CHECK: %fp = ADRP +name: func1 +body: | + bb.0: + successors: %bb.3, %bb.4 + CBZX undef %x0, %bb.3 + B %bb.4 + + bb.1: + successors: %bb.2, %bb.5 + %fp = COPY %0 + CBZX undef %x0, %bb.2 + B %bb.5 + + bb.2: + successors: %bb.6 + %fp = COPY %xzr; outside the lifetime of %0, so shouldn't matter + B %bb.6 + + bb.3: + %xzr = COPY %fp ; outside the lifetime of %0, so shouldn't matter + RET_ReallyLR + + bb.4: + successors: %bb.1 + %0 : gpr64 = ADRP 0 + B %bb.1 + + bb.5: + STRXui %fp, %fp, 0 + RET_ReallyLR + + bb.6: + RET_ReallyLR +... +--- +# Variant of func1 were we should not be able to coalesce because of extra %fp +# reads during the lifetime of %0 +# CHECK-LABEL: name: func2 +# CHECK: %fp = COPY %0 +# CHECK: %0 = ADRP +name: func2 +body: | + bb.0: + successors: %bb.4 + B %bb.4 + + bb.1: + successors: %bb.2, %bb.5 + %fp = COPY %0 + CBZX undef %x0, %bb.2 + B %bb.5 + + bb.2: + successors: %bb.6 + B %bb.6 + + bb.3: + successors: %bb.1 + %xzr = COPY %fp ; %0 is live here so this should block coalescing + B %bb.1 + + bb.4: + successors: %bb.3 + %0 : gpr64 = ADRP 0 + B %bb.3 + + bb.5: + STRXui %fp, %fp, 0 + RET_ReallyLR + + bb.6: + RET_ReallyLR +...