Index: lib/Target/AArch64/AArch64RedundantCopyElimination.cpp =================================================================== --- lib/Target/AArch64/AArch64RedundantCopyElimination.cpp +++ lib/Target/AArch64/AArch64RedundantCopyElimination.cpp @@ -43,6 +43,7 @@ class AArch64RedundantCopyElimination : public MachineFunctionPass { const MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; + BitVector ClobberedRegs; public: static char ID; @@ -80,6 +81,26 @@ return false; } +/// trackRegDefs - Remember what registers the specified instruction modifies. +static void trackRegDefs(const MachineInstr &MI, BitVector &ClobberedRegs, + const TargetRegisterInfo *TRI) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) + ClobberedRegs.setBitsNotInMask(MO.getRegMask()); + + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (!MO.isDef()) + continue; + + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + ClobberedRegs.set(*AI); + } +} + bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) { // Check if the current basic block has a single predecessor. if (MBB->pred_size() != 1) @@ -90,74 +111,132 @@ if (CompBr == PredMBB->end() || PredMBB->succ_size() != 2) return false; + // Keep track of the earliest point in the PredMBB block where kill markers + // need to be removed if a COPY is removed. + MachineBasicBlock::iterator FirstUse; + // Registers that are known to contain zeros at the start of MBB. + SmallVector TargetRegs; + // Registers clobbered in PredMBB between CompBr instruction and current + // instruction being checked in loop. + ClobberedRegs.reset(); ++CompBr; do { --CompBr; - if (guaranteesZeroRegInBlock(*CompBr, MBB)) + if (guaranteesZeroRegInBlock(*CompBr, MBB)) { + TargetRegs.push_back(CompBr->getOperand(0).getReg()); + FirstUse = CompBr; + // Look backward in PredMBB for COPYs from the known zero TargetReg to + // find other registers that are known to be zero. + for (auto PredI = CompBr;; --PredI) { + if (PredI->isCopy()) { + unsigned CopyDstReg = PredI->getOperand(0).getReg(); + unsigned CopySrcReg = PredI->getOperand(1).getReg(); + for (unsigned TargetReg : TargetRegs) { + if (ClobberedRegs[TargetReg]) + continue; + // If we have X = COPY Y, and Y is known to be zero, then now X is + // known to be zero. + if (CopySrcReg == TargetReg && !ClobberedRegs[CopyDstReg]) { + TargetRegs.push_back(CopyDstReg); + FirstUse = PredI; + break; + } + // If we have X = COPY Y, and X is known to be zero, then now Y is + // known to be zero. + if (CopyDstReg == TargetReg && !ClobberedRegs[CopySrcReg]) { + TargetRegs.push_back(CopySrcReg); + FirstUse = PredI; + break; + } + } + } + + // Stop if we get to the beginning of PredMBB. + if (PredI == PredMBB->begin()) + break; + + trackRegDefs(*PredI, ClobberedRegs, TRI); + // Stop if all of the known-zero regs have been clobbered. + if (all_of(TargetRegs, [&](unsigned TargetReg) { + return ClobberedRegs[TargetReg]; + })) + break; + } break; + } } while (CompBr != PredMBB->begin() && CompBr->isTerminator()); - // We've not found a CBZ/CBNZ, time to bail out. - if (!guaranteesZeroRegInBlock(*CompBr, MBB)) + // We've not found a known zero register, time to bail out. + if (TargetRegs.empty()) return false; - unsigned TargetReg = CompBr->getOperand(0).getReg(); - if (!TargetReg) - return false; - assert(TargetRegisterInfo::isPhysicalRegister(TargetReg) && - "Expect physical register"); - - // Remember all registers aliasing with TargetReg. - SmallSetVector TargetRegs; - for (MCRegAliasIterator AI(TargetReg, TRI, true); AI.isValid(); ++AI) - TargetRegs.insert(*AI); - bool Changed = false; + // UsedTargetRegs is the set of TargetRegs that have had uses added to MBB. + SmallSetVector UsedTargetRegs; MachineBasicBlock::iterator LastChange = MBB->begin(); - unsigned SmallestDef = TargetReg; // Remove redundant Copy instructions unless TargetReg is modified. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { MachineInstr *MI = &*I; ++I; - if (MI->isCopy() && MI->getOperand(0).isReg() && - MI->getOperand(1).isReg()) { - + bool RemovedCopy = false; + if (MI->isCopy()) { unsigned DefReg = MI->getOperand(0).getReg(); unsigned SrcReg = MI->getOperand(1).getReg(); if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) && - !MRI->isReserved(DefReg) && - (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) { - DEBUG(dbgs() << "Remove redundant Copy : "); - DEBUG((MI)->print(dbgs())); - - MI->eraseFromParent(); - Changed = true; - LastChange = I; - NumCopiesRemoved++; - SmallestDef = - TRI->isSubRegister(SmallestDef, DefReg) ? DefReg : SmallestDef; - continue; + !MRI->isReserved(DefReg)) { + for (unsigned TargetReg : TargetRegs) { + if (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg)) { + DEBUG(dbgs() << "Remove redundant Copy : "); + DEBUG((MI)->print(dbgs())); + + MI->eraseFromParent(); + Changed = true; + LastChange = I; + NumCopiesRemoved++; + UsedTargetRegs.insert(TargetReg); + RemovedCopy = true; + break; + } + } } } - if (MI->modifiesRegister(TargetReg, TRI)) + // Skip to the next instruction if we removed the COPY from WZR/XZR. + if (RemovedCopy) + continue; + + // Remove any regs the MI clobbers from the TargetRegs set. + for (unsigned RI = 0; RI < TargetRegs.size();) + if (MI->modifiesRegister(TargetRegs[RI], TRI)) { + std::swap(TargetRegs[RI], TargetRegs[TargetRegs.size() - 1]); + TargetRegs.pop_back(); + // Don't increment RI since we need to now check the swapped-in + // TargetRegs[RI]. + } else { + ++RI; + } + + // Continue until the TargetRegs set is empty. + if (TargetRegs.empty()) break; } if (!Changed) return false; - // Otherwise, we have to fixup the use-def chain, starting with the - // CBZ/CBNZ. Conservatively mark as much as we can live. - CompBr->clearRegisterKills(SmallestDef, TRI); - - if (none_of(TargetRegs, [&](unsigned Reg) { return MBB->isLiveIn(Reg); })) - MBB->addLiveIn(TargetReg); + // Add newly used regs to the block's live-in list if they aren't there + // already. + for (unsigned TargetReg : UsedTargetRegs) + if (!MBB->isLiveIn(TargetReg)) + MBB->addLiveIn(TargetReg); - // Clear any kills of TargetReg between CompBr and the last removed COPY. + // Clear kills in the range where changes were made. This is conservative, + // but should be okay since kill markers are being phased out. + for (MachineInstr &MMI : make_range(FirstUse, PredMBB->end())) + MMI.clearKillInfo(); for (MachineInstr &MMI : make_range(MBB->begin(), LastChange)) - MMI.clearRegisterKills(SmallestDef, TRI); + MMI.clearKillInfo(); return true; } @@ -168,6 +247,7 @@ return false; TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); + ClobberedRegs.resize(TRI->getNumRegs()); bool Changed = false; for (MachineBasicBlock &MBB : MF) Changed |= optimizeCopy(&MBB); Index: test/CodeGen/AArch64/machine-copy-remove.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/machine-copy-remove.mir @@ -0,0 +1,273 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -run-pass=aarch64-copyelim %s -verify-machineinstrs -o - 2>&1 | FileCheck %s + +--- | + define void @test1() { ret void } + define void @test2() { ret void } + define void @test3() { ret void } + define void @test4() { ret void } + define void @test5() { ret void } + define void @test6() { ret void } + define void @test7() { ret void } + define void @test8() { ret void } +... +--- +# Check that bb.0 COPY is seen through to allow the bb.1 COPY of XZR to be removed. +# CHECK-LABEL: name: test1 +# CHECK-NOT: COPY %xzr +name: test1 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1 + + %x0 = COPY %x1 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... +# Similar to test1, but with reversed COPY. +# CHECK-LABEL: name: test2 +# CHECK-NOT: COPY %xzr +name: test2 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1 + + %x1 = COPY %x0 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... +# Similar to test1, but with a clobber that prevents removal of the XZR COPY. +# CHECK-LABEL: name: test3 +# CHECK: COPY %xzr +name: test3 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1, %x2 + + %x0 = COPY %x1 + %x1 = LDRXui %x1, 0 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... +# Similar to test2, but with a clobber that prevents removal of the XZR COPY. +# CHECK-LABEL: name: test4 +# CHECK: COPY %xzr +name: test4 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1, %x2 + + %x1 = COPY %x0 + %x1 = LDRXui %x1, 0 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... +# Similar to test2, but with a clobber that prevents removal of the XZR COPY. +# CHECK-LABEL: name: test5 +# CHECK: COPY %xzr +name: test5 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1, %x2 + + %x1 = COPY %x0 + %x0 = LDRXui %x1, 0 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... +# Similar to test1, but with two levels of COPYs. +# CHECK-LABEL: name: test6 +# CHECK-NOT: COPY %xzr +name: test6 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1, %x2 + + %x2 = COPY %x0 + %x1 = COPY %x2 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... +# Similar to test1, but with two levels of COPYs and a clobber preventing COPY of XZR removal. +# CHECK-LABEL: name: test7 +# CHECK: COPY %xzr +name: test7 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1, %x2 + + %x2 = COPY %x0 + %x0 = LDRXui %x1, 0 + %x1 = COPY %x2 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... + +# Check that the TargetRegs vector clobber update loop in +# AArch64RedundantCopyElimination::optimizeCopy works correctly. +# CHECK-LABEL: name: test8 +# CHECK: x0 = COPY %xzr +# CHECK: x1 = COPY %xzr +name: test8 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1 + + %x1 = COPY %x0 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + liveins: %x0, %x2 + + %x0, %x1 = LDPXi %x2, 0 + %x0 = COPY %xzr + %x1 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +...