diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -240,6 +240,8 @@ void addRegClassDefCounts(std::vector &RegClassDefCounts, Register Reg) const; + void findAndSortDefOperandIndexes(const MachineInstr &MI); + void allocateInstruction(MachineInstr &MI); void handleDebugValue(MachineInstr &MI); void handleBundle(MachineInstr &MI); @@ -265,18 +267,18 @@ void allocVirtRegUndef(MachineOperand &MO); void assignDanglingDebugValues(MachineInstr &Def, Register VirtReg, MCPhysReg Reg); - void defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, + bool defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg); - void defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, + bool defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, bool LookAtPhysRegUses = false); - void useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg); + bool useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg); MachineBasicBlock::iterator getMBBBeginInsertionPoint(MachineBasicBlock &MBB, SmallSet &PrologLiveIns) const; void reloadAtBegin(MachineBasicBlock &MBB); - void setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg); + bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg); Register traceCopies(Register VirtReg) const; Register traceCopyChain(Register Reg) const; @@ -875,10 +877,11 @@ /// Variation of defineVirtReg() with special handling for livethrough regs /// (tied or earlyclobber) that may interfere with preassigned uses. -void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, +/// \return true if MI's MachineOperands were re-arranged/invalidated. +bool RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg) { if (!shouldAllocateRegister(VirtReg)) - return; + return false; LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); if (LRI != LiveVirtRegs.end()) { MCPhysReg PrevReg = LRI->PhysReg; @@ -909,11 +912,13 @@ /// perform an allocation if: /// - It is a dead definition without any uses. /// - The value is live out and all uses are in different basic blocks. -void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, +/// +/// \return true if MI's MachineOperands were re-arranged/invalidated. +bool RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, bool LookAtPhysRegUses) { assert(VirtReg.isVirtual() && "Not a virtual register"); if (!shouldAllocateRegister(VirtReg)) - return; + return false; MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; @@ -974,15 +979,16 @@ BundleVirtRegsMap[VirtReg] = PhysReg; } markRegUsedInInstr(PhysReg); - setPhysReg(MI, MO, PhysReg); + return setPhysReg(MI, MO, PhysReg); } /// Allocates a register for a VirtReg use. -void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum, +/// \return true if MI's MachineOperands were re-arranged/invalidated. +bool RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg) { assert(VirtReg.isVirtual() && "Not a virtual register"); if (!shouldAllocateRegister(VirtReg)) - return; + return false; MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; @@ -1019,8 +1025,7 @@ if (LRI->Error) { const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); ArrayRef AllocationOrder = RegClassInfo.getOrder(&RC); - setPhysReg(MI, MO, *AllocationOrder.begin()); - return; + return setPhysReg(MI, MO, *AllocationOrder.begin()); } } @@ -1030,18 +1035,17 @@ BundleVirtRegsMap[VirtReg] = LRI->PhysReg; } markRegUsedInInstr(LRI->PhysReg); - setPhysReg(MI, MO, LRI->PhysReg); + return setPhysReg(MI, MO, LRI->PhysReg); } -/// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This -/// may invalidate any operand pointers. Return true if the operand kills its -/// register. -void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, +/// Changes operand OpNum in MI the refer the PhysReg, considering subregs. +/// \return true if MI's MachineOperands were re-arranged/invalidated. +bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg) { if (!MO.getSubReg()) { MO.setReg(PhysReg); MO.setIsRenamable(true); - return; + return false; } // Handle subregister index. @@ -1057,7 +1061,8 @@ // register kill. if (MO.isKill()) { MI.addRegisterKilled(PhysReg, TRI, true); - return; + // Conservatively assume implicit MOs were re-arranged + return true; } // A of a sub-register requires an implicit def of the full @@ -1067,7 +1072,10 @@ MI.addRegisterDead(PhysReg, TRI, true); else MI.addRegisterDefined(PhysReg, TRI); + // Conservatively assume implicit MOs were re-arranged + return true; } + return false; } #ifndef NDEBUG @@ -1147,6 +1155,72 @@ } } +/// Compute \ref DefOperandIndexes so it contains the indices of "def" operands +/// that are to be allocated. Those are ordered in a way that small classes, +/// early clobbers and livethroughs are allocated first. +void RegAllocFast::findAndSortDefOperandIndexes(const MachineInstr &MI) { + DefOperandIndexes.clear(); + + // Track number of defs which may consume a register from the class. + std::vector RegClassDefCounts(TRI->getNumRegClasses(), 0); + assert(RegClassDefCounts[0] == 0); + + LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n"); + for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (MO.readsReg()) { + if (Reg.isPhysical()) { + LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI) << '\n'); + markPhysRegUsedInInstr(Reg); + } + } + + if (MO.isDef()) { + if (Reg.isVirtual() && shouldAllocateRegister(Reg)) + DefOperandIndexes.push_back(I); + + addRegClassDefCounts(RegClassDefCounts, Reg); + } + } + + llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) { + const MachineOperand &MO0 = MI.getOperand(I0); + const MachineOperand &MO1 = MI.getOperand(I1); + Register Reg0 = MO0.getReg(); + Register Reg1 = MO1.getReg(); + const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0); + const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1); + + // Identify regclass that are easy to use up completely just in this + // instruction. + unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size(); + unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size(); + + bool SmallClass0 = ClassSize0 < RegClassDefCounts[RC0.getID()]; + bool SmallClass1 = ClassSize1 < RegClassDefCounts[RC1.getID()]; + if (SmallClass0 > SmallClass1) + return true; + if (SmallClass0 < SmallClass1) + return false; + + // Allocate early clobbers and livethrough operands first. + bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() || + (MO0.getSubReg() == 0 && !MO0.isUndef()); + bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() || + (MO1.getSubReg() == 0 && !MO1.isUndef()); + if (Livethrough0 > Livethrough1) + return true; + if (Livethrough0 < Livethrough1) + return false; + + // Tie-break rule: operand index. + return I0 < I1; + }); +} + void RegAllocFast::allocateInstruction(MachineInstr &MI) { // The basic algorithm here is: // 1. Mark registers of def operands as free @@ -1218,6 +1292,10 @@ // Allocate virtreg defs. if (HasDef) { if (HasVRegDef) { + // Note that Implicit MOs can get re-arranged by defineVirtReg(), so loop + // multiple times to ensure no operand is missed. + bool ReArrangedImplicitOps = true; + // Special handling for early clobbers, tied operands or subregister defs: // Compared to "normal" defs these: // - Must not use a register that is pre-assigned for a use operand. @@ -1225,90 +1303,45 @@ // heuristic to figure out a good operand order before doing // assignments. if (NeedToAssignLiveThroughs) { - DefOperandIndexes.clear(); PhysRegUses.clear(); - // Track number of defs which may consume a register from the class. - std::vector RegClassDefCounts(TRI->getNumRegClasses(), 0); - assert(RegClassDefCounts[0] == 0); - - LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n"); - for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) - continue; - Register Reg = MO.getReg(); - if (MO.readsReg()) { - if (Reg.isPhysical()) { - LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI) - << '\n'); - markPhysRegUsedInInstr(Reg); + while (ReArrangedImplicitOps) { + ReArrangedImplicitOps = false; + findAndSortDefOperandIndexes(MI); + for (uint16_t OpIdx : DefOperandIndexes) { + MachineOperand &MO = MI.getOperand(OpIdx); + LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n'); + unsigned Reg = MO.getReg(); + if (MO.isEarlyClobber() || + (MO.isTied() && !TiedOpIsUndef(MO, OpIdx)) || + (MO.getSubReg() && !MO.isUndef())) { + ReArrangedImplicitOps = defineLiveThroughVirtReg(MI, OpIdx, Reg); + } else { + ReArrangedImplicitOps = defineVirtReg(MI, OpIdx, Reg); + } + if (ReArrangedImplicitOps) { + // Implicit operands of MI were re-arranged, + // re-compute DefOperandIndexes. + break; } - } - - if (MO.isDef()) { - if (Reg.isVirtual() && shouldAllocateRegister(Reg)) - DefOperandIndexes.push_back(I); - - addRegClassDefCounts(RegClassDefCounts, Reg); - } - } - - llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) { - const MachineOperand &MO0 = MI.getOperand(I0); - const MachineOperand &MO1 = MI.getOperand(I1); - Register Reg0 = MO0.getReg(); - Register Reg1 = MO1.getReg(); - const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0); - const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1); - - // Identify regclass that are easy to use up completely just in this - // instruction. - unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size(); - unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size(); - - bool SmallClass0 = ClassSize0 < RegClassDefCounts[RC0.getID()]; - bool SmallClass1 = ClassSize1 < RegClassDefCounts[RC1.getID()]; - if (SmallClass0 > SmallClass1) - return true; - if (SmallClass0 < SmallClass1) - return false; - - // Allocate early clobbers and livethrough operands first. - bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() || - (MO0.getSubReg() == 0 && !MO0.isUndef()); - bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() || - (MO1.getSubReg() == 0 && !MO1.isUndef()); - if (Livethrough0 > Livethrough1) - return true; - if (Livethrough0 < Livethrough1) - return false; - - // Tie-break rule: operand index. - return I0 < I1; - }); - - for (uint16_t OpIdx : DefOperandIndexes) { - MachineOperand &MO = MI.getOperand(OpIdx); - LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n'); - unsigned Reg = MO.getReg(); - if (MO.isEarlyClobber() || - (MO.isTied() && !TiedOpIsUndef(MO, OpIdx)) || - (MO.getSubReg() && !MO.isUndef())) { - defineLiveThroughVirtReg(MI, OpIdx, Reg); - } else { - defineVirtReg(MI, OpIdx, Reg); } } } else { // Assign virtual register defs. - for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { - MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg() || !MO.isDef()) - continue; - Register Reg = MO.getReg(); - if (Reg.isVirtual()) - defineVirtReg(MI, I, Reg); + while (ReArrangedImplicitOps) { + ReArrangedImplicitOps = false; + for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef()) + continue; + Register Reg = MO.getReg(); + if (Reg.isVirtual()) { + ReArrangedImplicitOps = defineVirtReg(MI, I, Reg); + if (ReArrangedImplicitOps) { + break; + } + } + } } } } @@ -1381,29 +1414,35 @@ } // Allocate virtreg uses and insert reloads as necessary. + // Implicit MOs can get moved/removed by useVirtReg(), so loop multiple + // times to ensure no operand is missed. bool HasUndefUse = false; - for (unsigned I = 0; I < MI.getNumOperands(); ++I) { - MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg() || !MO.isUse()) - continue; - Register Reg = MO.getReg(); - if (!Reg.isVirtual() || !shouldAllocateRegister(Reg)) - continue; - - if (MO.isUndef()) { - HasUndefUse = true; - continue; - } - + bool ReArrangedImplicitMOs = true; + while (ReArrangedImplicitMOs) { + ReArrangedImplicitMOs = false; + for (unsigned I = 0; I < MI.getNumOperands(); ++I) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isUse()) + continue; + Register Reg = MO.getReg(); + if (!Reg.isVirtual() || !shouldAllocateRegister(Reg)) + continue; - // Populate MayLiveAcrossBlocks in case the use block is allocated before - // the def block (removing the vreg uses). - mayLiveIn(Reg); + if (MO.isUndef()) { + HasUndefUse = true; + continue; + } + // Populate MayLiveAcrossBlocks in case the use block is allocated before + // the def block (removing the vreg uses). + mayLiveIn(Reg); - assert(!MO.isInternalRead() && "Bundles not supported"); - assert(MO.readsReg() && "reading use"); - useVirtReg(MI, I, Reg); + assert(!MO.isInternalRead() && "Bundles not supported"); + assert(MO.readsReg() && "reading use"); + ReArrangedImplicitMOs = useVirtReg(MI, I, Reg); + if (ReArrangedImplicitMOs) + break; + } } // Allocate undef operands. This is a separate step because in a situation diff --git a/llvm/test/CodeGen/ARM/regalloc-fast-rewrite-implicits.mir b/llvm/test/CodeGen/ARM/regalloc-fast-rewrite-implicits.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/regalloc-fast-rewrite-implicits.mir @@ -0,0 +1,78 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=armv7-apple-ios -run-pass=regallocfast -o - %s | FileCheck %s + + +# tBX_RET uses an implicit vreg with a sub-register. That implicit use will +# typically be rewritten as a use of the relevant super-register. Make sure +# regallocfast is able to process the remaining operands (here, %2) and rewrite +# them to use physical registers. +--- +name: different_vreg +tracksRegLiveness: true +body: | + bb.0: + liveins: $d2, $d4, $d7 + + ; CHECK-LABEL: name: different_vreg + ; CHECK: liveins: $d2, $d4, $d7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $d5 = COPY killed $d4 + ; CHECK-NEXT: renamable $d4 = COPY killed $d2 + ; CHECK-NEXT: undef renamable $d0 = COPY renamable $d5, implicit-def $q0_q1 + ; CHECK-NEXT: renamable $d1 = COPY renamable $d4 + ; CHECK-NEXT: renamable $d2 = COPY killed renamable $d5 + ; CHECK-NEXT: renamable $d3 = COPY killed renamable $d4 + ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit killed renamable $d7, implicit killed $q0_q1 + %0:dpr_vfp2 = COPY $d4 + %1:dpr_vfp2 = COPY $d2 + %2:dpr_vfp2 = COPY $d7 + undef %4.dsub_0:dquad = COPY %0 + %4.dsub_1:dquad = COPY %1 + %4.dsub_2:dquad = COPY %0 + %4.dsub_3:dquad = COPY %1 + tBX_RET 14, $noreg, implicit %4.dsub_3, implicit %2 +... + + +# tBX_RET uses the same vreg twice, make sure regallocfast is able to allocate a +# physical register for it and replace both references. +--- +name: same_vreg_twice +tracksRegLiveness: true +body: | + bb.0: + liveins: $d2, $d4 + + ; CHECK-LABEL: name: same_vreg_twice + ; CHECK: liveins: $d2, $d4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $d5 = COPY killed $d4 + ; CHECK-NEXT: renamable $d4 = COPY killed $d2 + ; CHECK-NEXT: undef renamable $d0 = COPY renamable $d5, implicit-def $q0_q1 + ; CHECK-NEXT: renamable $d1 = COPY renamable $d4 + ; CHECK-NEXT: renamable $d2 = COPY killed renamable $d5 + ; CHECK-NEXT: renamable $d3 = COPY killed renamable $d4 + ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit renamable $d3, implicit killed $q0_q1 + %0:dpr_vfp2 = COPY $d4 + %1:dpr_vfp2 = COPY $d2 + undef %4.dsub_0:dquad = COPY %0 + %4.dsub_1:dquad = COPY %1 + %4.dsub_2:dquad = COPY %0 + %4.dsub_3:dquad = COPY %1 + tBX_RET 14, $noreg, implicit %4.dsub_1, implicit %4.dsub_3 +... + +# tBL partially defines two vregs, which turn out to be dead. Make sure +# regallocfast allocates a phys reg for both %0 and %1. +--- +name: partial_dead +tracksRegLiveness: true +body: | + bb.0: + + ; CHECK-LABEL: name: partial_dead + ; CHECK: tBL 14 /* CC::al */, $noreg, 0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $q0_q1, implicit-def dead $q2_q3 + ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg + tBL 14, $noreg, 0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def undef %0.dsub_0:dquad, implicit-def undef %1.dsub_2:dquad + tBX_RET 14, $noreg +...