Index: lib/Target/AArch64/AArch64RedundantCopyElimination.cpp =================================================================== --- lib/Target/AArch64/AArch64RedundantCopyElimination.cpp +++ lib/Target/AArch64/AArch64RedundantCopyElimination.cpp @@ -12,13 +12,15 @@ // CBZW %W0, // BB#2: // %W0 = COPY %WZR -// This pass should be run after register allocation. // -// FIXME: This should be extended to handle any constant other than zero. E.g., -// cmp w0, #1 -// b.eq .BB1 -// BB1: -// mov w0, #1 +// Similarly, this pass also handles non-zero copies. +// BB#0: +// cmp x0, #1 +// b.eq .LBB0_1 +// .LBB0_1: +// orr x0, xzr, #0x1 +// +// This pass should be run after register allocation. // // FIXME: This could also be extended to check the whole dominance subtree below // the comparison if the compile time regression is acceptable. @@ -50,7 +52,10 @@ initializeAArch64RedundantCopyEliminationPass( *PassRegistry::getPassRegistry()); } - bool optimizeCopy(MachineBasicBlock *MBB); + + void removeRedundantCopy(MachineInstr *MI, unsigned DefReg, + unsigned &SmallestDef); + bool optimizeCopy(MachineBasicBlock *MBB, BitVector &ModifiedRegs); bool runOnMachineFunction(MachineFunction &MF) override; MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( @@ -76,7 +81,114 @@ MBB != MI.getOperand(1).getMBB()); } -bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) { +/// Remember what registers the specified instruction modifies. +static void trackRegDefs(const MachineInstr &MI, BitVector &ModifiedRegs, + const TargetRegisterInfo *TRI) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) + ModifiedRegs.setBitsNotInMask(MO.getRegMask()); + + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + // WZR/XZR are not modified even when used as a destination register. + if (Reg != AArch64::WZR && Reg != AArch64::XZR) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + ModifiedRegs.set(*AI); + } + } +} + +// Check if the current basic block is the target block to which a conditional +// branch instruction jumps and whos equality comparison is against a constant. +// If not, return false. Otherwise, return true after setting the TargetReg +// and known ImmVal. +static bool knownRegValInBlock(MachineInstr &MI, MachineBasicBlock *PredMBB, + MachineBasicBlock *MBB, unsigned &TargetReg, + int64_t &ImmVal, BitVector &ModifiedRegs, + const TargetRegisterInfo *TRI) { + unsigned Opc = MI.getOpcode(); + + // Must be a conditional branch. + if (Opc != AArch64::Bcc) + return false; + + // Must be an equality check (i.e., == or !=). + AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)MI.getOperand(0).getImm(); + if (CC != AArch64CC::EQ && CC != AArch64CC::NE) + return false; + + MachineBasicBlock *BrTarget = MI.getOperand(1).getMBB(); + if ((CC == AArch64CC::EQ && BrTarget != MBB) || + (CC == AArch64CC::NE && BrTarget == MBB)) + return false; + + if (MI == PredMBB->begin()) + return false; + + // Track which registers have been modified. + ModifiedRegs.reset(); + + // Finds compare instruction that sets NZCV used by conditional branch. + for (MachineBasicBlock::iterator B = PredMBB->begin(), I = MI; I != B;) { + --I; + + switch (I->getOpcode()) { + default: + break; + + // CMP is an alias for SUBS with a dead destination register. + case AArch64::SUBSWri: + case AArch64::SUBSXri: { + unsigned DestReg = I->getOperand(0).getReg(); + if (DestReg != AArch64::WZR && DestReg != AArch64::XZR) + return false; + + // Must not be a symbolic immediate. + if (!I->getOperand(2).isImm()) + return false; + + // For simplicity, give up on non-zero shifts. + if (I->getOperand(3).getImm()) + return false; + + // The src register must not be modified between the cmp and conditional + // branch. + if (ModifiedRegs[I->getOperand(1).getReg()]) + return false; + + // We know that the TargetReg register contains ImmVal value. + TargetReg = I->getOperand(1).getReg(); + ImmVal = I->getOperand(2).getImm(); + return true; + } + } + + // Bail if we see an instruction that defines NZCV, but we don't handle. + if (I->definesRegister(AArch64::NZCV)) + return false; + + // Track modified registers. + trackRegDefs(MI, ModifiedRegs, TRI); + } + return false; +} + +void AArch64RedundantCopyElimination::removeRedundantCopy( + MachineInstr *MI, unsigned DefReg, unsigned &SmallestDef) { + DEBUG(dbgs() << "Remove redundant copy : "); + DEBUG((MI)->print(dbgs())); + + MI->eraseFromParent(); + SmallestDef = TRI->isSubRegister(SmallestDef, DefReg) ? DefReg : SmallestDef; + NumCopiesRemoved++; +} + +bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB, + BitVector &ModifiedRegs) { // Check if the current basic block has a single predecessor. if (MBB->pred_size() != 1) return false; @@ -91,22 +203,34 @@ if (CompBr == PredMBB->end()) return false; + bool ZeroRegVal; + unsigned TargetReg; ++CompBr; do { --CompBr; - if (guaranteesZeroRegInBlock(*CompBr, MBB)) + if ((ZeroRegVal = guaranteesZeroRegInBlock(*CompBr, MBB))) { + TargetReg = CompBr->getOperand(0).getReg(); break; + } } while (CompBr != PredMBB->begin() && CompBr->isTerminator()); - // We've not found a CBZ/CBNZ, time to bail out. - if (!guaranteesZeroRegInBlock(*CompBr, MBB)) - return false; + int64_t KnownImm; + bool KnownRegVal = false; + if (!ZeroRegVal) { + CompBr = PredMBB->getLastNonDebugInstr(); + KnownRegVal = knownRegValInBlock(*CompBr, PredMBB, MBB, TargetReg, KnownImm, + ModifiedRegs, TRI); + } - unsigned TargetReg = CompBr->getOperand(0).getReg(); - if (!TargetReg) + // We've not found a CBZ/CBNZ or a non-zero compare and branch, time to bail + // out. + if (!ZeroRegVal && !KnownRegVal) return false; + assert(TargetRegisterInfo::isPhysicalRegister(TargetReg) && "Expect physical register"); + assert(!(ZeroRegVal && KnownRegVal) && + "ZeroRegVal and KnownRegVal can't both be true."); // Remember all registers aliasing with TargetReg. SmallSetVector TargetRegs; @@ -120,28 +244,56 @@ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { MachineInstr *MI = &*I; ++I; - if (MI->isCopy() && MI->getOperand(0).isReg() && + + // Case 1: + // BB#0: + // cbz x0, .LBB0_2 + // BB#1: + // ldr x0, [x1] + // ret + // .LBB0_2: + // mov x0, wzr <-- redundant copy + // ret + if (ZeroRegVal && MI->isCopy() && MI->getOperand(0).isReg() && MI->getOperand(1).isReg()) { unsigned DefReg = MI->getOperand(0).getReg(); unsigned SrcReg = MI->getOperand(1).getReg(); - - if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) && - !MRI->isReserved(DefReg) && + if (!MRI->isReserved(DefReg) && + (SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) && (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) { - DEBUG(dbgs() << "Remove redundant Copy : "); - DEBUG((MI)->print(dbgs())); - - MI->eraseFromParent(); + removeRedundantCopy(MI, DefReg, SmallestDef); + LastChange = I; Changed = true; + continue; + } + } + // Case 2: + // BB#0: + // cmp x0, #1 + // b.eq .LBB0_2 + // BB#1: + // ldr x0, [x1] + // ret + // .LBB0_2: + // orr x0, xzr, #0x1 <-- redundant mov + // ret + if (KnownRegVal && MI->isMoveImmediate() && MI->getOperand(0).isReg() && + MI->getOperand(1).isImm()) { + + unsigned DefReg = MI->getOperand(0).getReg(); + int64_t SrcImm = MI->getOperand(1).getImm(); + + if (!MRI->isReserved(DefReg) && SrcImm == KnownImm && + (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) { + removeRedundantCopy(MI, DefReg, SmallestDef); LastChange = I; - NumCopiesRemoved++; - SmallestDef = - TRI->isSubRegister(SmallestDef, DefReg) ? DefReg : SmallestDef; + Changed = true; continue; } } + // Give up if the target register's value is changed. if (MI->modifiesRegister(TargetReg, TRI)) break; } @@ -169,9 +321,15 @@ return false; TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); + + // Resize the modified register bitfield trackers. We do this once per + // function and then clear the bitfield each time we optimize a copy. + BitVector ModifiedRegs; + ModifiedRegs.resize(TRI->getNumRegs()); + bool Changed = false; for (MachineBasicBlock &MBB : MF) - Changed |= optimizeCopy(&MBB); + Changed |= optimizeCopy(&MBB, ModifiedRegs); return Changed; } Index: test/CodeGen/AArch64/machine-copy-remove.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/machine-copy-remove.mir @@ -0,0 +1,77 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -run-pass=aarch64-copyelim %s -verify-machineinstrs -o - 2>&1 | FileCheck %s + +# These tests make sure we can remove a redundant non-zero copy instruction. + +--- | + define void @test1() { ret void } + define void @test2() { ret void } +... +--- +name: test1 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: %w0, %x1 + + dead %wzr = SUBSWri killed %w0, 1, 0, implicit-def %nzcv + Bcc 1, %bb.2, implicit killed %nzcv + + bb.1: + successors: %bb.3(0x80000000) + + %w0 = MOVi32imm 1 ; MOVi32imm is redundant base on a dominating condition. + B %bb.3 + + bb.2: + successors: %bb.3(0x80000000) + liveins: %x1 + + %w0 = LDRWui killed %x1, 0 :: (load 4) + + bb.3: + liveins: %w0 + + RET_ReallyLR implicit %w0 + +... +# CHECK-LABEL: test2 +# CHECK: bb.1: +# CHECK-NEXT: successors: %bb.3(0x80000000) +# CHECK-NEXT: liveins: %w0 +# CHECK-NOT: %w0 = MOVi32imm 1 +# CHECK: RET_ReallyLR +--- +name: test2 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: %x0, %x1 + + dead %xzr = SUBSXri killed %x0, 1, 0, implicit-def %nzcv + Bcc 1, %bb.2, implicit killed %nzcv + + bb.1: + successors: %bb.3(0x80000000) + + %w0 = MOVi32imm 1, implicit-def %x0 ; MOVi32imm is redundant base on a dominating condition. + B %bb.3 + + bb.2: + successors: %bb.3(0x80000000) + liveins: %x1 + + %x0 = LDRXui killed %x1, 0 :: (load 8) + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 +... +# CHECK-LABEL: test2 +# CHECK: bb.1: +# CHECK-NEXT: successors: %bb.3(0x80000000) +# CHECK-NEXT: liveins: %x0 +# CHECK-NOT: %w0 = MOVi32imm 1 +# CHECK: RET_ReallyLR