Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -154,6 +154,9 @@ /// This pass adds dead/undef flags after analyzing subregister lanes. extern char &DetectDeadLanesID; + /// This pass sink COPY instruction close to its use. + extern char &SinkCopyID; + /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. /// Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -97,6 +97,7 @@ void initializeCalledValuePropagationLegacyPassPass(PassRegistry &); void initializeConstantMergeLegacyPassPass(PassRegistry&); void initializeConstantPropagationPass(PassRegistry&); +void initializeCopySinkPassPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); void initializeCostModelAnalysisPass(PassRegistry&); void initializeEntryExitInstrumenterPass(PassRegistry&); Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -63,6 +63,7 @@ MachineBranchProbabilityInfo.cpp MachineCombiner.cpp MachineCopyPropagation.cpp + MachineCopySink.cpp MachineCSE.cpp MachineDominanceFrontier.cpp MachineDominators.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -24,6 +24,7 @@ initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); initializeCodeGenPreparePass(Registry); + initializeCopySinkPassPass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeDetectDeadLanesPass(Registry); initializeDwarfEHPreparePass(Registry); Index: lib/CodeGen/MachineCopySink.cpp =================================================================== --- /dev/null +++ lib/CodeGen/MachineCopySink.cpp @@ -0,0 +1,225 @@ +//===- MachineCopySink.cpp - Machine Copy Sink --*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass sink COPY instructions into a successor close to their use, if +// unused in the current block. In the CodeGen phase, COPY instructions are +// added in some passes (e.g., ISel and PhiElimination), which are not handled +// in MachineSink pass. For example, in AArch64, ISel adds Copy instructions to +// move function parameters (PhyReg) to virtual registers in the entry block. +// Before RA, MachineSink cannot sink such Copy instructions because SrcReg is +// an allocatable PhyReg. By sinking such COPY instructions to successors close +// to its actual use, we can avoid executing instructions in case the result is +// not used. Also, it will open up more opportunities for other optimizations +// (e.g., dead copy elimination in MachineCopyPropagation and shrink-wrapping). +// +// For example, for the machine IR below, this pass will sink %w19 in the entry +// into its successor (%bb.1) because %w19 is only live-in in %bb.1. +// %bb.0: +// %wzr = SUBSWri %w1, 1 +// %w19 = COPY %w0 +// Bcc 11, %bb.2 +// %bb.1: +// Live Ins: %w19 +// BL @fun +// %w0 = ADDWrr %w0, %w19 +// RET %w0 +// %bb.2: +// %w0 = COPY %wzr +// RET %w0 +// As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be +// able to see %bb.0 as a candidate. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "postra-copy-sink" + +STATISTIC(NumCopySink, "Number of copies sunk"); + +namespace { + +class CopySinkPass : public MachineFunctionPass { +public: + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; + CopySinkPass() : MachineFunctionPass(ID) {} + StringRef getPassName() const override { return "Copy Sink"; } + +private: + /// Track which registers have been modified and used. + BitVector ModifiedRegs, UsedRegs; + + /// Sink Copy instructions unused in the same block close to their uses in + /// successors. + bool tryToSinkCopy(MachineBasicBlock *Entry, MachineFunction &MF, + const TargetRegisterInfo *TRI); +}; +} // namespace + +char CopySinkPass::ID = 0; +char &llvm::SinkCopyID = CopySinkPass::ID; + +INITIALIZE_PASS(CopySinkPass, DEBUG_TYPE, "Copy Sink", false, false) + +/// Remember what registers the specified instruction uses +/// and modifies. +static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs, + BitVector &UsedRegs, + const TargetRegisterInfo *TRI) { + for (MachineOperand &MO : MI->operands()) { + if (MO.isRegMask()) + ModifiedRegs.setBitsNotInMask(MO.getRegMask()); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + ModifiedRegs.set(*AI); + } else { + assert(MO.isUse() && "Reg operand not a def and not a use"); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + UsedRegs.set(*AI); + } + } +} + +static MachineBasicBlock * +getSingleLiveInSuccBB(MachineBasicBlock &CurBB, + SmallVector &SinkableBBs, + unsigned Reg, const TargetRegisterInfo *TRI) { + // Check if any register aliased with Reg is live-in in other successors. + SmallSet AliasedRegs; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + AliasedRegs.insert(*AI); + + // Try to find a single sinkable successor in which Reg is live-in. + MachineBasicBlock *BB = nullptr; + for (auto *SI : SinkableBBs) { + if (SI->isLiveIn(Reg)) { + if (BB) + return nullptr; + BB = SI; + } + } + + if (!BB) + return nullptr; + + // Check if any register aliased with Reg is live-in in other successors. + for (auto *SI : CurBB.successors()) { + if (SI == BB) + continue; + for (const auto LI : SI->liveins()) + if (AliasedRegs.count(LI.PhysReg)) + return nullptr; + } + return BB; +} + +bool CopySinkPass::tryToSinkCopy(MachineBasicBlock *CurBB, MachineFunction &MF, + const TargetRegisterInfo *TRI) { + SmallVector SinkableBBs; + // FIXME: For now, we sink only to a successor which has a single predecessor + // so that we can directly sink COPY instructions to the successor without + // adding any new block or branch instruction. + for (MachineBasicBlock *SI : CurBB->successors()) + if (!SI->livein_empty() && SI->pred_size() == 1) + SinkableBBs.push_back(SI); + + if (SinkableBBs.empty()) + return false; + + bool Changed = false; + + // Track which registers have been modified and used between the end of the + // block and the current instruction. + ModifiedRegs.reset(); + UsedRegs.reset(); + + for (auto I = CurBB->rbegin(), E = CurBB->rend(); I != E;) { + MachineInstr *MI = &*I; + ++I; + + // Do not move any instruction across function call. + if (MI->isCall()) + return false; + + if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + continue; + } + + unsigned DefReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + + if (ModifiedRegs[DefReg] || ModifiedRegs[SrcReg] || UsedRegs[DefReg]) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + continue; + } + + MachineBasicBlock *SuccBB = + getSingleLiveInSuccBB(*CurBB, SinkableBBs, DefReg, TRI); + if (!SuccBB) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + continue; + } + assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == CurBB) && + "Unexpected predecessor"); + + // Clear the kill flag if SrcReg is killed between MI and the end of the + // block. + if (UsedRegs[SrcReg]) { + MachineBasicBlock::iterator NI = std::next(MI->getIterator()); + for (MachineInstr &UI : make_range(NI, CurBB->end())) { + if (UI.killsRegister(SrcReg, TRI)) { + UI.clearRegisterKills(SrcReg, TRI); + MI->getOperand(1).setIsKill(true); + break; + } + } + } + + MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); + SuccBB->splice(InsertPos, CurBB, MI); + SuccBB->removeLiveIn(DefReg); + if (!SuccBB->isLiveIn(SrcReg)) + SuccBB->addLiveIn(SrcReg); + + Changed = true; + ++NumCopySink; + } + return Changed; +} + +bool CopySinkPass::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + ModifiedRegs.resize(TRI->getNumRegs()); + UsedRegs.resize(TRI->getNumRegs()); + + for (auto &BB : MF) + Changed |= tryToSinkCopy(&BB, MF, TRI); + + return Changed; +} Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -839,8 +839,10 @@ addPostRegAlloc(); // Insert prolog/epilog code. Eliminate abstract frame index references... - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) { + addPass(&SinkCopyID); addPass(&ShrinkWrapID); + } // Prolog/Epilog inserter needs a TargetMachine to instantiate. But only // do so if it hasn't been disabled, substituted, or overridden. Index: test/CodeGen/AArch64/post-rs-copy-sink.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/post-rs-copy-sink.mir @@ -0,0 +1,335 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=postra-copy-sink -verify-machineinstrs -o - %s | FileCheck %s + +--- +# Sink w19 to %bb.1. +# CHECK-LABEL: name: sinkcopy1 +# CHECK-LABEL: bb.0: +# CHECK-NOT: %w19 = COPY killed %w0 +# CHECK-LABEL: bb.1: +# CHECK: liveins: %w1, %w0 +# CHECK: renamable %w19 = COPY killed %w0 + +name: sinkcopy1 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY killed %w0 + Bcc 11, %bb.1, implicit %nzcv + B %bb.2 + + bb.1: + liveins: %w1, %w19 + %w0 = ADDWrr %w1, %w19 + RET %x0 + + bb.2: + %w0 = COPY %wzr + RET %x0 +... + +--- +# Sink w19 to %bb.2. +# CHECK-LABEL: name: sinkcopy2 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable %w19 = COPY killed %w0 +# CHECK-LABEL: bb.2: +# CHECK: liveins: %w1, %w0 +# CHECK: renamable %w19 = COPY killed %w0 +name: sinkcopy2 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY killed %w0 + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + %w0 = COPY %wzr + RET %x0 + + bb.2: + liveins: %w1, %w19 + %w0 = ADDWrr %w1, %w19 + RET %x0 +... + +--- +# Sink w19 and w20 to %bb.1. +# CHECK-LABEL: name: sinkcopy3 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable %w19 = COPY killed %w0 +# CHECK-LABEL: bb.1: +# CHECK: liveins: %w1, %w0 +# CHECK: renamable %w19 = COPY killed %w0 +# CHECK: renamable %w20 = COPY killed %w1 +name: sinkcopy3 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY killed %w0 + renamable %w20 = COPY killed %w1 + + bb.1: + liveins: %w19, %w20 + %w0 = COPY %w19 + %w1 = COPY %w20 + RET %x0 +... + + +# Sink w19 to %bb.1 and w20 to %bb.2. +# CHECK-LABEL: name: sinkcopy4 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable %w19 = COPY killed %w0 +# CHECK-NOT: renamable %w20 = COPY killed %w1 +# CHECK-LABEL: bb.1: +# CHECK: liveins: %w1, %w0 +# CHECK: renamable %w19 = COPY killed %w0 +# CHECK-LABEL: bb.2: +# CHECK: liveins: %w0, %w1 +# CHECK: renamable %w20 = COPY killed %w1 +name: sinkcopy4 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY killed %w0 + renamable %w20 = COPY killed %w1 + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + liveins: %w1, %w19 + %w0 = ADDWrr %w1, %w19 + RET %x0 + + bb.2: + liveins: %w0, %w20 + %w0 = ADDWrr %w0, %w20 + RET %x0 +... + +# Sink w19 to %bb.3 through %bb.2. +# CHECK-LABEL: name: sinkcopy5 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable %w19 = COPY %w0 +# CHECK-LABEL: bb.2: +# CHECK: %w1 = ADDWrr %w1, %w0 +# CHECK-LABEL: bb.3: +# CHECK: liveins: %w1, %w0 +# CHECK: renamable %w19 = COPY killed %w0 +name: sinkcopy5 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY %w0 + Bcc 11, %bb.2, implicit %nzcv + + bb.1: + liveins: %x0 + %w19 = COPY %wzr + RET %x0 + + bb.2: + liveins: %w0, %w1, %w19 + %w1 = ADDWrr %w1, killed %w0 + + bb.3: + liveins: %w1, %w19 + %w0 = ADDWrr %w1, %w19 + RET %x0 +... + +# Sink w19 to %bb.3, but through %bb.2. +# CHECK-LABEL: name: sinkcopy6 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable %w19 = COPY %w0 +# CHECK-NOT: renamable %w20 = COPY %w0 +# CHECK-LABEL: bb.2: +# CHECK: liveins: %w1, %w0 +# CHECK: renamable %w19 = COPY %w0 +# CHECK: renamable %w20 = COPY %w19 +name: sinkcopy6 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY %w0 + renamable %w20 = COPY %w19 + Bcc 11, %bb.2, implicit %nzcv + + bb.1: + %w0 = COPY %wzr + RET %x0 + + bb.2: + liveins: %w1, %w20 + %w0 = ADDWrr killed %w1, %w20 + RET %x0 +... + +--- +# Don't sink w19 as w0 is defined in bb.0. +# CHECK-LABEL: name: donotsinkcopy1 +# CHECK-LABEL: bb.0: +# CHECK: renamable %w19 = COPY %w0 +name: donotsinkcopy1 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY %w0 + %w0 = LDRWui %sp, 0 :: (load 4) + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + %w19 = COPY %wzr + + bb.2: + liveins: %w0, %w19 + %w0 = ADDWrr %w0, %w19 + RET %x0 +... + +--- +# Don't sink w19 as w19 is used in bb.0. +# CHECK-LABEL: name: donotsinkcopy2 +# CHECK-LABEL: bb.0: +# CHECK: renamable %w19 = COPY %w0 +name: donotsinkcopy2 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY %w0 + STRWui %w1, %x19, 0 :: (store 4) + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + %w19 = COPY %wzr + + bb.2: + liveins: %w0, %w19 + %w0 = ADDWrr %w0, %w19 + RET %x0 +... + +--- +# Don't sink w19 as w19 is used in both %bb.1 and %bb.2. +# CHECK-LABEL: name: donotsinkcopy3 +# CHECK-LABEL: bb.0: +# CHECK: renamable %w19 = COPY %w0 +name: donotsinkcopy3 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY %w0 + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + liveins: %w19 + %w0 = COPY %w19 + RET %x0 + + bb.2: + liveins: %w0, %w19 + %w0 = ADDWrr %w0, %w19 + RET %x0 +... + +--- +# Don't sink w19 as %bb.2 has multiple predecessors. +# CHECK-LABEL: name: donotsinkcopy4 +# CHECK-LABEL: bb.0: +# CHECK: renamable %w19 = COPY %w0 +name: donotsinkcopy4 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY %w0 + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + liveins: %w0 + %w19 = COPY %w0 + B %bb.2 + + bb.2: + liveins: %w0, %w19 + %w0 = ADDWrr %w0, %w19 + RET %x0 +... + + +# Don't sink w19 after sinking w20. +# CHECK-LABEL: name: donotsinkcopy5 +# CHECK-LABEL: bb.0: +# CHECK: renamable %w19 = COPY %w0 +# CHECK-LABEL: bb.2: +# CHECK: liveins: %w0, %w19 +# CHECK: renamable %w20 = COPY %w19 +name: donotsinkcopy5 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY %w0 + renamable %w20 = COPY %w19 + Bcc 11, %bb.2, implicit %nzcv + + bb.1: + liveins: %w19 + %w0 = COPY %w19 + RET %x0 + + bb.2: + liveins: %w0, %w20 + %w0 = ADDWrr killed %w0, %w20 + RET %x0 +... + +--- +# Don't sink w19 as %bb.2 has multiple predecessors. +# CHECK-LABEL: name: donotsinkcopy6 +# CHECK-LABEL: bb.0: +name: donotsinkcopy6 +tracksRegLiveness: true +body: | + bb.0: + liveins: %x0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %x19 = COPY %x0 + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + liveins: %w19 + %w0 = COPY %w19 + RET %x0 + + bb.2: + liveins: %x0, %x19 + %x0 = ADDXrr %x0, %x19 + RET %x0 +... Index: test/CodeGen/AArch64/sink-copy-for-shrink-wrap.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/sink-copy-for-shrink-wrap.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s + +; CHECK-LABEL: %bb.0: +; CHECK-NOT: stp +; CHECK-NOT: mov w{{[0-9]+}}, w0 +; CHECK-LABEL: %bb.1: +; CHECK: stp x19 +; CHECK: mov w{{[0-9]+}}, w0 + +define i32 @shrinkwrapme(i32 %paramAcrossCall, i32 %paramNotAcrossCall) { +entry: + %cmp5 = icmp sgt i32 %paramNotAcrossCall, 0 + br i1 %cmp5, label %CallBB, label %Exit +CallBB: + %call = call i32 @fun() + %add = add i32 %call, %paramAcrossCall + ret i32 %add +Exit: + ret i32 0 +} + +declare i32 @fun()