Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -1750,6 +1750,9 @@ LiveInterval &RHS = LIS->getInterval(SrcReg); DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); + if (!MRI->hasOneDef(SrcReg)) + return false; + assert(RHS.containsOneValue() && "Invalid join with reserved register"); // Optimization for reserved registers like ESP. We can only merge with a @@ -1805,27 +1808,43 @@ // => // %Y = def // ... - if (!MRI->hasOneNonDBGUse(SrcReg)) { - DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); - return false; - } + const MachineOperand &DefMO = *MRI->def_begin(SrcReg); - MachineInstr &DestMI = *MRI->getVRegDef(SrcReg); CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg); - SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); - SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); + SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI); + const MachineInstr &DestMI = *DefMO.getParent(); + SlotIndex DestRegIdx = + LIS->getInstructionIndex(DestMI).getRegSlot(DefMO.isEarlyClobber()); if (!MRI->isConstantPhysReg(DstReg)) { // We checked above that there are no interfering defs of the physical // register. However, for this case, where we intent to move up the def of - // the physical register, we also need to check for interfering uses. - SlotIndexes *Indexes = LIS->getSlotIndexes(); - for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx); - SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) { - MachineInstr *MI = LIS->getInstructionFromIndex(SI); - if (MI->readsRegister(DstReg, TRI)) { - DEBUG(dbgs() << "\t\tInterference (read): " << *MI); - return false; + // the physical register, we also need to check that there are no uses + // of DstReg during the lifetime of SrcReg. + SlotIndexes &Indexes = *LIS->getSlotIndexes(); + for (const LiveRange::Segment &S : RHS) { + // Adjust Start to point to an instruction. The segment either starts + // at block begin or at the defining instruction. + // Segments start at the block begin which has no + // instruction associated or at the definition instruction of SrcReg. + // We can ignore the defining instructions uses unless the def is in the + // earlyclobber slot. + SlotIndex Start = S.start; + if (Start.isBlock() || !DestRegIdx.isEarlyClobber()) { + assert(Start.isBlock() || + (SlotIndex::isSameInstr(Start, DestRegIdx) && + !DestRegIdx.isEarlyClobber()) && "block begin or unique def"); + Start = Indexes.getNextNonNullIndex(Start); + } + + // Check instructions in range for uses of DstReg. + for (SlotIndex SI = Start; SI < S.end; + SI = Indexes.getNextNonNullIndex(SI)) { + MachineInstr &MI = *LIS->getInstructionFromIndex(SI); + if (MI.readsRegister(DstReg, TRI)) { + DEBUG(dbgs() << "\t\tInterference (read): " << MI); + return false; + } } } } @@ -1833,9 +1852,9 @@ // We're going to remove the copy which defines a physical reserved // register, so remove its valno, etc. DEBUG(dbgs() << "\t\tRemoving phys reg def of " << PrintReg(DstReg, TRI) - << " at " << CopyRegIdx << "\n"); + << " at " << CopyIdx << "\n"); - LIS->removePhysRegDefAt(DstReg, CopyRegIdx); + LIS->removePhysRegDefAt(DstReg, CopyIdx.getRegSlot()); // Create a new dead def at the new def location. for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) { LiveRange &LR = LIS->getRegUnit(*UI); Index: test/CodeGen/AArch64/regcoal-physreg.mir =================================================================== --- test/CodeGen/AArch64/regcoal-physreg.mir +++ test/CodeGen/AArch64/regcoal-physreg.mir @@ -2,40 +2,33 @@ --- | declare void @f2() - define void @func() { ret void } + define void @func0() { ret void } + define void @func1() { ret void } + define void @func2() { ret void } ... --- # Check coalescing of COPYs from reserved physregs. -# CHECK-LABEL: name: func -name: func -registers: - - { id: 0, class: gpr32 } - - { id: 1, class: gpr64 } - - { id: 2, class: gpr64 } - - { id: 3, class: gpr32 } - - { id: 4, class: gpr64 } - - { id: 5, class: gpr32 } - - { id: 6, class: xseqpairsclass } - - { id: 7, class: gpr64 } +# CHECK-LABEL: name: func0 +name: func0 body: | bb.0: ; We usually should not coalesce copies from allocatable physregs. ; CHECK: %0 = COPY %w7 ; CHECK: STRWui %0, %x1, 0 - %0 = COPY %w7 + %0 : gpr32 = COPY %w7 STRWui %0, %x1, 0 ; It is fine to coalesce copies from reserved physregs ; CHECK-NOT: COPY ; CHECK: STRXui %fp, %x1, 0 - %1 = COPY %fp + %1 : gpr64 = COPY %fp STRXui %1, %x1, 0 ; It is not fine to coalesce copies from reserved physregs when they are ; clobbered. ; CHECK: %2 = COPY %fp ; CHECK: STRXui %2, %x1, 0 - %2 = COPY %fp + %2 : gpr64 = COPY %fp %fp = SUBXri %fp, 4, 0 STRXui %2, %x1, 0 @@ -43,7 +36,7 @@ ; clobbered. ; CHECK-NOT: COPY ; CHECK: STRWui %wzr, %x1 - %3 = COPY %wzr + %3 : gpr32 = COPY %wzr dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv STRWui %3, %x1, 0 @@ -51,13 +44,13 @@ ; clobbered. ; CHECK-NOT: COPY ; CHECK: STRXui %xzr, %x1 - %4 = COPY %xzr + %4 : gpr64 = COPY %xzr dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv STRXui %4, %x1, 0 ; Coalescing COPYs into constant physregs. ; CHECK: %wzr = SUBSWri %w1, 0, 0 - %5 = SUBSWri %w1, 0, 0, implicit-def %nzcv + %5 : gpr32 = SUBSWri %w1, 0, 0, implicit-def %nzcv %wzr = COPY %5 ; Only coalesce when the source register is reserved as a whole (this is @@ -65,7 +58,7 @@ ; of the non-reserved part). ; CHECK: %6 = COPY %x28_fp ; CHECK: HINT 0, implicit %6 - %6 = COPY %x28_fp + %6 : xseqpairsclass = COPY %x28_fp HINT 0, implicit %6 ; It is not fine to coalesce copies from reserved physregs when they are @@ -76,7 +69,98 @@ ; Need a def of x18 so that it's not deduced as "constant". %x18 = COPY %xzr - %7 = COPY %x18 + %7 : gpr64 = COPY %x18 BL @f2, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp STRXui %7, %x1, 0 + + ; This can be coalesced. + ; CHECK: %fp = SUBXri %fp, 4, 0 + %8 : gpr64sp = SUBXri %fp, 4, 0 + %fp = COPY %8 + + ; Cannot coalesce when vreg definition is an earlyclobber slot. + ; (Model a hypotethical instruction with early clobber def as NOOP) + ; CHECK-NOT: HINT 0, implicit %fp, implicit-def early-clobber %fp + ; CHECK: HINT 0, implicit %fp, implicit-def early-clobber %9 + ; CHECK: %fp = COPY %9 + HINT 0, implicit %fp, implicit-def early-clobber %9 : gpr64 + %fp = COPY %9 +... +--- +# It should be possible to coalesce "%0 = COPY %fp" even with the contrived CFG +# CHECK-LABEL: name: func1 +# CHECK-NOT: %fp = COPY %0 +# CHECK: %fp = ADRP +name: func1 +body: | + bb.0: + successors: %bb.3, %bb.4 + CBZX undef %x0, %bb.3 + B %bb.4 + + bb.1: + successors: %bb.2, %bb.5 + %fp = COPY %0 + CBZX undef %x0, %bb.2 + B %bb.5 + + bb.2: + successors: %bb.6 + %fp = COPY %xzr; outside the lifetime of %0, so shouldn't matter + B %bb.6 + + bb.3: + %xzr = COPY %fp ; outside the lifetime of %0, so shouldn't matter + RET_ReallyLR + + bb.4: + successors: %bb.1 + %0 : gpr64 = ADRP 0 + B %bb.1 + + bb.5: + STRXui %fp, %fp, 0 + RET_ReallyLR + + bb.6: + RET_ReallyLR +... +--- +# Variant of func1 were we should not be able to coalesce because of extra %fp +# reads during the lifetime of %0 +# CHECK-LABEL: name: func2 +# CHECK: %fp = COPY %0 +# CHECK: %0 = ADRP +name: func2 +body: | + bb.0: + successors: %bb.4 + B %bb.4 + + bb.1: + successors: %bb.2, %bb.5 + %fp = COPY %0 + CBZX undef %x0, %bb.2 + B %bb.5 + + bb.2: + successors: %bb.6 + B %bb.6 + + bb.3: + successors: %bb.1 + %xzr = COPY %fp ; %0 is live here so this should block coalescing + B %bb.1 + + bb.4: + successors: %bb.3 + %0 : gpr64 = ADRP 0 + B %bb.3 + + bb.5: + STRXui %fp, %fp, 0 + RET_ReallyLR + + bb.6: + RET_ReallyLR ...