Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -1810,22 +1810,25 @@ return false; } - MachineInstr &DestMI = *MRI->getVRegDef(SrcReg); CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg); - SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); - SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); + SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI); if (!MRI->isConstantPhysReg(DstReg)) { // We checked above that there are no interfering defs of the physical // register. However, for this case, where we intent to move up the def of - // the physical register, we also need to check for interfering uses. + // the physical register, we also need to check that there are no uses + // of DstReg during the lifetime of SrcReg. SlotIndexes *Indexes = LIS->getSlotIndexes(); - for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx); - SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) { - MachineInstr *MI = LIS->getInstructionFromIndex(SI); - if (MI->readsRegister(DstReg, TRI)) { - DEBUG(dbgs() << "\t\tInterference (read): " << *MI); - return false; + for (const LiveRange::Segment &S : RHS) { + // Note that we do not need to check S.valno as there is only one + // definition/valno of SrcReg. + for (SlotIndex SI = Indexes->getNextNonNullIndex(S.start); + SI < S.end; SI = Indexes->getNextNonNullIndex(SI)) { + MachineInstr &MI = *LIS->getInstructionFromIndex(SI); + if (MI.readsRegister(DstReg, TRI)) { + DEBUG(dbgs() << "\t\tInterference (read): " << MI); + return false; + } } } } @@ -1833,10 +1836,12 @@ // We're going to remove the copy which defines a physical reserved // register, so remove its valno, etc. DEBUG(dbgs() << "\t\tRemoving phys reg def of " << PrintReg(DstReg, TRI) - << " at " << CopyRegIdx << "\n"); + << " at " << CopyIdx << "\n"); - LIS->removePhysRegDefAt(DstReg, CopyRegIdx); + LIS->removePhysRegDefAt(DstReg, CopyIdx.getRegSlot()); // Create a new dead def at the new def location. + const MachineInstr &DestMI = *MRI->getVRegDef(RHS.reg); + SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) { LiveRange &LR = LIS->getRegUnit(*UI); LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator()); Index: test/CodeGen/AArch64/regcoal-physreg.mir =================================================================== --- test/CodeGen/AArch64/regcoal-physreg.mir +++ test/CodeGen/AArch64/regcoal-physreg.mir @@ -2,12 +2,14 @@ --- | declare void @f2() - define void @func() { ret void } + define void @func0() { ret void } + define void @func1() { ret void } + define void @func2() { ret void } ... --- # Check coalescing of COPYs from reserved physregs. -# CHECK-LABEL: name: func -name: func +# CHECK-LABEL: name: func0 +name: func0 registers: - { id: 0, class: gpr32 } - { id: 1, class: gpr64 } @@ -80,3 +82,81 @@ BL @f2, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp STRXui %7, %x1, 0 ... +--- +# It should be possible to coalesce "%0 = COPY %fp" even with the contrived CFG +# CHECK-LABEL: name: func1 +# CHECK-NOT: %fp = COPY %0 +# CHECK: %fp = ADRP +name: func1 +body: | + bb.0: + successors: %bb.3, %bb.4 + CBZX undef %x0, %bb.3 + B %bb.4 + + bb.1: + successors: %bb.2, %bb.5 + %fp = COPY %0 + CBZX undef %x0, %bb.2 + B %bb.5 + + bb.2: + successors: %bb.6 + %fp = COPY %xzr; outside the lifetime of %0, so shouldn't matter + B %bb.6 + + bb.3: + %xzr = COPY %fp ; outside the lifetime of %0, so shouldn't matter + RET_ReallyLR + + bb.4: + successors: %bb.1 + %0 : gpr64 = ADRP 0 + B %bb.1 + + bb.5: + STRXui %fp, %fp, 0 + RET_ReallyLR + + bb.6: + RET_ReallyLR +... +--- +# Variant of func1 were we should not be able to coalesce because of extra %fp +# reads during the lifetime of %0 +# CHECK-LABEL: name: func2 +# CHECK: %fp = COPY %0 +# CHECK: %0 = ADRP +name: func2 +body: | + bb.0: + successors: %bb.4 + B %bb.4 + + bb.1: + successors: %bb.2, %bb.5 + %fp = COPY %0 + CBZX undef %x0, %bb.2 + B %bb.5 + + bb.2: + successors: %bb.6 + B %bb.6 + + bb.3: + successors: %bb.1 + %xzr = COPY %fp ; %0 is live here so this should block coalescing + B %bb.1 + + bb.4: + successors: %bb.3 + %0 : gpr64 = ADRP 0 + B %bb.3 + + bb.5: + STRXui %fp, %fp, 0 + RET_ReallyLR + + bb.6: + RET_ReallyLR +...