Index: lib/CodeGen/ShrinkWrap.cpp =================================================================== --- lib/CodeGen/ShrinkWrap.cpp +++ lib/CodeGen/ShrinkWrap.cpp @@ -49,6 +49,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" @@ -82,6 +83,17 @@ #include #include +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCRegisterInfo.h" + using namespace llvm; #define DEBUG_TYPE "shrink-wrap" @@ -153,6 +165,14 @@ /// after Save and before Restore. bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const; + /// \brief Sink Copys to a CSR in the entry block into successors if we can + /// safely move them. + bool sinkCSRFromEntry(MachineFunction &MF, RegScavenger *RS, + const TargetRegisterInfo *TRI); + + /// Return true if \p MI define a CSR. + bool defCSR(const MachineInstr &MI, RegScavenger *RS) const; + const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const { if (CurrentCSRs.empty()) { BitVector SavedRegs; @@ -272,6 +292,26 @@ return false; } +bool ShrinkWrap::defCSR(const MachineInstr &MI, RegScavenger *RS) const { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg()) { + unsigned PhysReg = MO.getReg(); + if (!PhysReg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && + "Unallocated register?!"); + if (MO.isDef() && RCI.getLastCalleeSavedAlias(PhysReg)) + return true; + } else if (MO.isRegMask()) { + // Check if this regmask clobbers any of the CSRs. + for (unsigned Reg : getCurrentCSRs(RS)) + if (MO.clobbersPhysReg(Reg)) + return true; + } + } + return false; +} + /// \brief Helper function to find the immediate (post) dominator. template static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, @@ -444,6 +484,118 @@ return false; } +static bool isLiveIn(MachineBasicBlock *MBB, SmallSet &RegAliasSet, + const TargetRegisterInfo *TRI) { + for (const auto LI : MBB->liveins()) + if (RegAliasSet.count(LI.PhysReg)) + return true; + return false; +} + +static bool isUsed(MachineInstr &MI, unsigned Reg, + const TargetRegisterInfo *TRI) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (TRI->isSuperOrSubRegisterEq(MOReg, Reg)) { + return true; + } + } + return false; +} + +static bool canSinkThrough(MachineInstr &CI, const TargetRegisterInfo *TRI) { + assert(CI.isCopy() && "Expect only copy"); + unsigned DefReg = CI.getOperand(0).getReg(); + unsigned SrcReg = CI.getOperand(1).getReg(); + MachineBasicBlock::iterator I = CI, E = CI.getParent()->end(); + ++I; + while (I != E) { + MachineInstr &MI = *I; + if (MI.isCall()) + return false; + if (isUsed(MI, DefReg, TRI) || MI.modifiesRegister(DefReg, TRI) || + MI.modifiesRegister(SrcReg, TRI)) + return false; + ++I; + } + return true; +} + +/// Sink Copy instructions which define a CSR in the entry to successors. +bool ShrinkWrap::sinkCSRFromEntry(MachineFunction &MF, RegScavenger *RS, + const TargetRegisterInfo *TRI) { + MachineBasicBlock *Entry = &MF.front(); + SmallVector SuccBBs(Entry->succ_begin(), + Entry->succ_end()); + DenseMap NewSuccBBs; + bool Changed = false; + + if (SuccBBs.size() != 2 || SuccBBs[0] == SuccBBs[1]) + return Changed; + + for (auto I = Entry->rbegin(), E = Entry->rend(); I != E;) { + MachineInstr *MI = &*I; + ++I; + + // FIXME: For now, only handle Copys which define CSRs. + if (!MI->isCopy() || !defCSR(*MI, RS)) + continue; + + unsigned CSRDefReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + + SmallSet RegAliasSet; + for (MCRegAliasIterator AI(CSRDefReg, TRI, true); AI.isValid(); ++AI) + RegAliasSet.insert(*AI); + + bool isLiveOutBB0 = isLiveIn(SuccBBs[0], RegAliasSet, TRI); + bool isLiveOutBB1 = isLiveIn(SuccBBs[1], RegAliasSet, TRI); + + if (!((!isLiveOutBB0 && isLiveOutBB1) || (isLiveOutBB0 && !isLiveOutBB1))) + continue; + + // Sink if Def of CSR is not used/defed and SrcReg is defed in + if (!canSinkThrough(*MI, TRI)) + continue; + + // Create a new successor for the entry, and place the Copy which def a CSR + // to the new successor. + MachineBasicBlock *SuccBB = isLiveOutBB0 ? SuccBBs[0] : SuccBBs[1]; + if (!NewSuccBBs.count(SuccBB)) { + MachineBasicBlock *NMBB = MF.CreateMachineBasicBlock(); + MF.insert(MachineFunction::iterator(SuccBB), NMBB); + Entry->ReplaceUsesOfBlockWith(SuccBB, NMBB); + NMBB->addSuccessor(SuccBB); + + for (const auto &LI : SuccBB->liveins()) + NMBB->addLiveIn(LI); + + for (auto *PI : SuccBB->predecessors()) + if (PI != Entry) + PI->updateTerminator(); + NewSuccBBs[SuccBB] = NMBB; + } + + MachineBasicBlock *NewSuccBB = NewSuccBBs[SuccBB]; + + // Move the instr to the new successor. + MachineBasicBlock::iterator InsertPos = NewSuccBB->getFirstNonPHI(); + NewSuccBB->splice(InsertPos, Entry, MI); + + NewSuccBB->removeLiveIn(CSRDefReg); + if (!NewSuccBB->isLiveIn(SrcReg)) + NewSuccBB->addLiveIn(SrcReg); + + Changed = true; + } + return Changed; +} + bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) return false; @@ -467,6 +619,11 @@ std::unique_ptr RS( TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); + if (sinkCSRFromEntry(MF, RS.get(), TRI)) { + MDT->runOnMachineFunction(MF); + MPDT->runOnMachineFunction(MF); + } + for (MachineBasicBlock &MBB : MF) { DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName() << '\n'); Index: test/CodeGen/AArch64/post-rs-copy-sink.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/post-rs-copy-sink.mir @@ -0,0 +1,110 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass shrink-wrap -verify-machineinstrs -o - %s | FileCheck %s +--- +# Sink w19 to the new predecessor of bb.2. +# CHECK-LABEL: name: sinkcopy1 +# CHECK-LABEL: bb.0: +# CHECK-NOT: %w19 = COPY %w0 +# CHECK: Bcc 11, %bb.3 +# CHECK-LABEL: bb.3: +# CHECK: %w19 = COPY %w0 + +name: sinkcopy1 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + %w19 = COPY %w0 + Bcc 11, %bb.1, implicit %nzcv + B %bb.2 + + bb.1: + liveins: %w0, %w19 + %w0 = ADDWrr %w0, %w19 + RET %x0 + + bb.2: + %w0 = COPY %wzr + RET %x0 +... + +--- +# Sink w19 to the new predecessor of bb.2 and then it should branch from bb.1 to +# bb.2 as bb.3 is placed between bb.1 and bb.2. +# CHECK-LABEL: name: sinkcopy2 +# CHECK-LABEL: bb.0: +# CHECK-NOT: %w19 = COPY %w0 +# CHECK: Bcc 11, %bb.3 +# CHECK-LABEL: bb.1: +# CHECK: B %bb.2 +# CHECK-LABEL: bb.3: +# CHECK: %w19 = COPY %w0 +# CHECK-LABEL: bb.2: +name: sinkcopy2 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + %w19 = COPY %w0 + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + %w19 = COPY %wzr + + bb.2: + liveins: %w0, %w19 + %w0 = ADDWrr %w0, %w19 + RET %x0 +... + +--- +# Don't sink w19 as w0 is defined in bb.0 +# CHECK-LABEL: name: sinkcopy3 +# CHECK-LABEL: bb.0: +# CHECK: %w19 = COPY %w0 +name: sinkcopy3 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + %w19 = COPY %w0 + %w0 = LDRWui %sp, 0 :: (load 4) + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + %w19 = COPY %wzr + + bb.2: + liveins: %w0, %w19 + %w0 = ADDWrr %w0, %w19 + RET %x0 +... + +--- +# Don't sink w19 as w19 is used in bb.0 +# CHECK-LABEL: name: sinkcopy4 +# CHECK-LABEL: bb.0: +# CHECK: %w19 = COPY %w0 +name: sinkcopy4 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + %w19 = COPY %w0 + STRWui %w1, %x19, 0 :: (store 4) + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + %w19 = COPY %wzr + + bb.2: + liveins: %w0, %w19 + %w0 = ADDWrr %w0, %w19 + RET %x0 +... Index: test/CodeGen/AArch64/sink-copy-for-shrink-wrap.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/sink-copy-for-shrink-wrap.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s + +; CHECK-LABEL: BB#0 +; CHECK-NOT: stp +; CHECK-NOT: mov w{{[0-9]+}}, w0 +; CHECK-LABEL: BB#1 +; CHECK: stp x19 +; CHECK: mov w{{[0-9]+}}, w0 + +define i32 @shrinkwrapme(i32 %paramAcrossCall, i32 %paramNotAcrossCall) { +entry: + %cmp5 = icmp sgt i32 %paramNotAcrossCall, 0 + br i1 %cmp5, label %CallBB, label %Exit +CallBB: + %call = call i32 @fun() + %add = add i32 %call, %paramAcrossCall + ret i32 %add +Exit: + ret i32 0 +} + +declare i32 @fun()