Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -1109,6 +1109,14 @@ } virtual bool optimizeCondBranch(MachineInstr *MI) const { return false; } + // Try to remove redundant copy instructions which remain after register + // allocation. + virtual bool optimizeCopy(MachineBasicBlock *MBB, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI) const { + return false; + } + /// Try to remove the load by folding it to a register operand at the use. /// We fold the load instructions if and only if the /// def and use are in the same BB. We only look at one load and see Index: lib/CodeGen/MachineCopyPropagation.cpp =================================================================== --- lib/CodeGen/MachineCopyPropagation.cpp +++ lib/CodeGen/MachineCopyPropagation.cpp @@ -339,8 +339,9 @@ TII = MF.getSubtarget().getInstrInfo(); MRI = &MF.getRegInfo(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { Changed |= CopyPropagateBlock(*I); - + Changed |= TII->optimizeCopy(&*I, MRI, TRI); + } return Changed; } Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -163,6 +163,12 @@ unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const override; bool optimizeCondBranch(MachineInstr *MI) const override; + + // optimizeCopy - Remove redundant copy instructions which remain after + // register allocation. + bool optimizeCopy(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI) const override; + /// Return true when there is potentially a faster code sequence /// for an instruction chain ending in . All potential patterns are /// listed in the array. Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -982,6 +982,101 @@ return true; } +// This function try to remove copy instructions placed in a target block of +// the cbz/cbnz and they copy zero to the same register used in cbz/cbnz. +// For instance, the copy instruction in below code can be removed because the +// CBZW jumps to BB#2 when %W0 is zero. +// BB#1: +// CBZW %W0, +// BB#2: +// %W0 = COPY %WZR +bool AArch64InstrInfo::optimizeCopy(MachineBasicBlock *MBB, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI) const { + bool Changed = false; + // Check if the current basic block has a single predecessor. + if (MBB->pred_size() != 1) + return false; + + MachineBasicBlock *PredMBB = *MBB->pred_begin(); + MachineBasicBlock::iterator CompBr = PredMBB->getLastNonDebugInstr(); + if (CompBr == PredMBB->end() || PredMBB->succ_size() != 2) + return false; + + unsigned LastOpc = CompBr->getOpcode(); + // Check if the current basic block is the target block to which the cbz/cbnz + // instruction jumps when its Wt/Xt is zero. + if (LastOpc == AArch64::CBZW || LastOpc == AArch64::CBZX) { + if (MBB != CompBr->getOperand(1).getMBB()) + return false; + } else if (LastOpc == AArch64::CBNZW || LastOpc == AArch64::CBNZX) { + if (MBB == CompBr->getOperand(1).getMBB()) + return false; + } else { + return false; + } + + unsigned TargetReg = CompBr->getOperand(0).getReg(); + if (!TargetReg) + return false; + assert(TargetRegisterInfo::isPhysicalRegister(TargetReg) && + "optimizeCopy() should be run after register allocation!"); + + // Remember all registers aliasing with TargetReg. + SmallSetVector TargetRegs; + for (MCRegAliasIterator AI(TargetReg, TRI, true); AI.isValid(); ++AI) + TargetRegs.insert(*AI); + + // Remove redundant Copy instructions unless TargetReg is modified. + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { + MachineInstr *MI = &*I; + ++I; + + if (MI->isCopy() && MI->getOperand(0).isReg() && + MI->getOperand(1).isReg()) { + unsigned DefReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) && + !MRI->isReserved(DefReg) && + (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) { + // Clear any kills of TargetReg between CompBr and MI. + CompBr->clearRegisterKills(TargetReg, TRI); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MI; I != E; ++I) + I->clearRegisterKills(TargetReg, TRI); + + MI->eraseFromParent(); + Changed = true; + continue; + } + } + + // Stop if encountering any intervening side effect. + if (MI->hasUnmodeledSideEffects() || MI->isCall() || MI->isTerminator()) + return Changed; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + // FIXME: It is possible to use the register mask to check if all + // registers in TargetRegs are not clobbered. For now, we treat it like + // a basic block boundary. + if (MO.isRegMask()) + return Changed; + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "optimizeCopy() should be run after register allocation!"); + + // Stop if the TargetReg is modified. + if (MO.isDef() && TargetRegs.count(Reg)) + return Changed; + } + } + return Changed; +} + bool AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { if (MI->getOpcode() != TargetOpcode::LOAD_STACK_GUARD) Index: test/CodeGen/AArch64/machine-copy-remove.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/machine-copy-remove.ll @@ -0,0 +1,74 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: f_XX +; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]] +; CHECK: [[BB]]: +; CHECK-NOT: mov x[[REG]], xzr +define i64 @f_XX(i64 %n, i64* nocapture readonly %P) { +entry: + %tobool = icmp eq i64 %n, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %0 = load i64, i64* %P + br label %if.end + +if.end: ; preds = %entry, %if.then + %a.0 = phi i64 [ %0, %if.then ], [ 0, %entry ] + ret i64 %a.0 +} + +; CHECK-LABEL: f_WW +; CHECK: cbz w[[REG:[0-9]+]], [[BB:.LBB.*]] +; CHECK: [[BB]]: +; CHECK-NOT: mov w[[REG]], wzr +define i32 @f_WW(i32 %n, i32* nocapture readonly %P) { +entry: + %tobool = icmp eq i32 %n, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %0 = load i32, i32* %P + br label %if.end + +if.end: ; preds = %entry, %if.then + %a.0 = phi i32 [ %0, %if.then ], [ 0, %entry ] + ret i32 %a.0 +} + +; CHECK-LABEL: f_XW +; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]] +; CHECK: [[BB]]: +; CHECK-NOT: mov w[[REG]], wzr +define i32 @f_XW(i64 %n, i32* nocapture readonly %P) { +entry: + %tobool = icmp eq i64 %n, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %0 = load i32, i32* %P + br label %if.end + +if.end: ; preds = %entry, %if.then + %a.0 = phi i32 [ %0, %if.then ], [ 0, %entry ] + ret i32 %a.0 +} + +; CHECK-LABEL: f_WX +; CHECK: cbz w[[REG:[0-9]+]], [[BB:.LBB.*]] +; CHECK: [[BB]]: +; CHECK: mov x[[REG]], xzr +define i64 @f_WX(i32 %n, i64* nocapture readonly %P) { +entry: + %tobool = icmp eq i32 %n, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %0 = load i64, i64* %P + br label %if.end + +if.end: ; preds = %entry, %if.then + %a.0 = phi i64 [ %0, %if.then ], [ 0, %entry ] + ret i64 %a.0 +} +