Index: llvm/lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -129,6 +129,15 @@ int &SPAdj); void insertPrologEpilogCode(MachineFunction &MF); void insertZeroCallUsedRegs(MachineFunction &MF); + + void removeRedundantAddrAnchors(MachineFunction &MF); + // Data structures to map regs to definitions per MBB. + typedef std::map Reg2DefMap; + typedef std::map MBB2RegDefsMap; + void visitBlock(MachineBasicBlock *MBB, + std::set &Visited, + std::list &Worklist, + MBB2RegDefsMap &RegDefs); }; } // end anonymous namespace @@ -269,6 +278,10 @@ if (TRI->requiresRegisterScavenging(MF) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(MF, *RS); + // Remove redundant frame addressing anchor points created during Frame + // Indices elimination. + removeRedundantAddrAnchors(MF); + // Warn on stack size when we exceeds the given limit. MachineFrameInfo &MFI = MF.getFrameInfo(); uint64_t StackSize = MFI.getStackSize(); @@ -1443,3 +1456,201 @@ RS->forward(MI); } } + +#ifndef NDEBUG +// (Experimental) Only used to verify that all reachable MBBs were processed. +static void addSuccessors(MachineBasicBlock *MBB, + std::set &Visited) { + Visited.insert(MBB); + for (MachineBasicBlock *SuccMBB : MBB->successors()) + if (!Visited.count(SuccMBB)) + addSuccessors(SuccMBB, Visited); +} +static unsigned getNumReachableBlocks(MachineFunction &MF) { + std::set Visited; + addSuccessors(&MF.front(), Visited); + return Visited.size(); +} +#endif + +void PEI::removeRedundantAddrAnchors(MachineFunction &MF) { + // TODO: BreadthFirstIterator..? + std::set Visited; + std::list Worklist; + Worklist.push_back(&MF.front()); + MBB2RegDefsMap RegDefs; + auto allPredsVisited = [&Visited](MachineBasicBlock *MBB) { + for (MachineBasicBlock *Pred : MBB->predecessors()) + if (!Visited.count(Pred)) + return false; + return true; + }; + + // Try to visit all blocks in an order so that all predecessors of an MBB + // were visited before, in order to reuse definitions from them. + bool Change = true; + while (Change) { + Change = false; + for (auto &MBB : Worklist) + if (!Visited.count(MBB) && allPredsVisited(MBB)) { + visitBlock(MBB, Visited, Worklist, RegDefs); + Change = true; + } + if (!Change) + // TODO: Probably better to prioritize a loop header here. + for (auto &MBB : Worklist) + if (!Visited.count(MBB)) { + visitBlock(MBB, Visited, Worklist, RegDefs); + Change = true; + break; + } + } + + assert(Visited.size() == getNumReachableBlocks(MF) && + "Failed to visit all MBBs."); +} + +// Slow version for now: Update live-in lists and clear kill flags after each +// removal of a redundant definition. Not sure how important kill-flags are +// after PEI, but this clears them carefully on each user. TODO: This should +// probably be done after all else in a single pass instead to avoid +// unnecessary compile-time. +static void clearKillsForDef(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + Register Reg, + MachineInstr *MI, + std::set &Visited, + const TargetRegisterInfo *TRI) { + Visited.insert(MBB); + // Clear kill flags on instructions before I in MBB, stopping at a + // definition of Reg, which should be an identical instruction. + while (I != MBB->begin()) { + I--; + if (I->modifiesRegister(Reg, TRI)) { + assert (I->isIdenticalTo(*MI) && "Broken redundancy assumption."); + return; + } + I->clearRegisterKills(Reg, TRI); + } + + // If earlier def is not in MBB, continue in predecessors. + if (!MBB->isLiveIn(Reg)) + MBB->addLiveIn(Reg); + assert(MBB->pred_size() && "Predecessor def not found!"); + for (MachineBasicBlock *Pred : MBB->predecessors()) + if (!Visited.count(Pred)) + clearKillsForDef(Pred, Pred->end(), Reg, MI, Visited, TRI); +} + +static void removeRedundantDef(MachineInstr *MI, + const TargetRegisterInfo *TRI) { + Register Reg = MI->getOperand(0).getReg(); + std::set Visited; + clearKillsForDef(MI->getParent(), MI->getIterator(), Reg, MI, Visited, TRI); + MI->eraseFromParent(); +} + +// EXPERIMENTAL +// -pei-localonly causes only earlier defs in same MBB to be reused. +static cl::opt PEI_LOCALONLY("pei-localonly", cl::init(false), cl::Hidden); + +void PEI::visitBlock(MachineBasicBlock *MBB, + std::set &Visited, + std::list &Worklist, + MBB2RegDefsMap &RegDefs) { + Visited.insert(MBB); + + // Reuse definition in predecessor(s). + std::set PredRegs; + if (!PEI_LOCALONLY) { + // Find set of all regs reusable from any predecessor. + for (MachineBasicBlock *Pred : MBB->predecessors()) + for (auto I : RegDefs[Pred]) + PredRegs.insert(I.first); + } + Reg2DefMap &MBBDefs = RegDefs[MBB]; + // Fill MBBDefs with reusable definitions if all predecessor definitions + // are present and identical. + for(auto Reg : PredRegs) { + MachineInstr *IdenticalDefMI = nullptr; + for (MachineBasicBlock *Pred : MBB->predecessors()) { + MachineInstr *PredDefMI = nullptr; + if (RegDefs[Pred].find(Reg) != RegDefs[Pred].end()) + PredDefMI = RegDefs[Pred][Reg]; + if (PredDefMI == nullptr || + (IdenticalDefMI != nullptr && + !IdenticalDefMI->isIdenticalTo(*PredDefMI))) { + IdenticalDefMI = nullptr; + break; + } + IdenticalDefMI = PredDefMI; + } + if (IdenticalDefMI != nullptr) { + MBBDefs[Reg] = IdenticalDefMI; + LLVM_DEBUG(dbgs() << "Reusable instruction from pred(s): in MBB#" + << MBB->getNumber() << ":"; IdenticalDefMI->dump();); + } + } + + // Process MBB. + MachineFunction *MF = MBB->getParent(); + const TargetRegisterInfo *TRI =MF->getSubtarget().getRegisterInfo(); + Register FrameReg = TRI->getFrameRegister(*MF); + for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end();) { + MachineInstr *MI = &*(I++); + + // If the FrameReg is modified, no previous definitions (using it) can be + // reused. TODO: There seems to be many load immediate instructions that + // do not use FrameReg that could still be reused. + if (MI->modifiesRegister(FrameReg, TRI)) { + MBBDefs.clear(); + continue; + } + + // A candidate is a simple instruction that does not touch memory, has + // only one register definition and the only reg it may use is FrameReg. + Register DefedReg = 0; + bool Candidate = !(MI->mayStore() || MI->mayLoad() || MI->isCall() || + MI->isInlineAsm() || MI->hasUnmodeledSideEffects()); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + if (MO.isDef()) { + if (i == 0 && !MO.isImplicit()) + DefedReg = MO.getReg(); + else + Candidate = false; + } + else if (MO.getReg() && MO.getReg() != FrameReg) + Candidate = false; + } + else if (!MO.isImm()) + Candidate = false; + } + + if (Candidate && DefedReg && MBBDefs[DefedReg] != nullptr && + MBBDefs[DefedReg]->isIdenticalTo(*MI)) { + LLVM_DEBUG(dbgs() << "Removing redundant instruction in MBB#" + << MBB->getNumber() << ":"; MI->dump();); + removeRedundantDef(MI, TRI); + continue; + } + + // Clear any defs that MI clobbers. + for (auto DefI : MBBDefs) { + Register Reg = DefI.first; + if (MI->modifiesRegister(Reg, TRI)) + MBBDefs[Reg] = nullptr; + } + + if (Candidate && DefedReg) { + LLVM_DEBUG(dbgs() << "Found interesting instruction in MBB#" + << MBB->getNumber() << ":"; MI->dump();); + MBBDefs[DefedReg] = MI; + } + } + + // Add successors to worklist. + for (MachineBasicBlock *SuccMBB : MBB->successors()) + Worklist.push_back(SuccMBB); +} Index: llvm/test/CodeGen/SystemZ/frame-28.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/frame-28.mir @@ -0,0 +1,214 @@ +# RUN: llc -mtriple=s390x-linux-gnu -start-before=prologepilog %s -o - -mcpu=z14 \ +# RUN: -print-after=prologepilog -verify-machineinstrs 2>&1 | FileCheck %s +# REQUIRES: asserts +# +# Test that redundant frame addressing anchor points are removed by PEI. + +--- | + define void @fun1() { ret void } + define void @fun2() { ret void } + define void @fun3() { ret void } + define void @fun4() { ret void } + define void @fun5() { ret void } + + declare i32 @foo() +--- + +# Test elimination of redundant LAYs in successor blocks. +# CHECK: # *** IR Dump After Prologue/Epilogue Insertion & Frame Finalization +# CHECK-NEXT: # Machine code for function fun1 +# CHECK: bb.0 (%ir-block.0): +# CHECK: $r1d = LAY $r15d, 4096, $noreg +# CHECK: bb.1: +# CHECK-NOT: LAY +# CHECK: bb.2: +# CHECK-NOT: LAY +--- +name: fun1 +tracksRegLiveness: true +stack: + - { id: 0, size: 5000 } + - { id: 1, size: 2500 } + - { id: 2, size: 2500 } + +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $f0d + successors: %bb.2(0x00000001), %bb.1(0x7fffffff) + + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.1, 0, $noreg + CHIMux undef $r0l, 3, implicit-def $cc + BRC 14, 8, %bb.2, implicit killed $cc + J %bb.1 + + bb.1: + liveins: $f0d + VST64 renamable $f0d, %stack.2, 0, $noreg + J %bb.2 + + bb.2: + liveins: $f0d + VST64 renamable $f0d, %stack.1, 0, $noreg + Return +... + +# In this function the LAY in bb.1 will have a different offset, so the first +# LAY in bb.2 must remain. +# CHECK: # *** IR Dump After Prologue/Epilogue Insertion & Frame Finalization +# CHECK-NEXT: # Machine code for function fun2 +# CHECK: bb.0 (%ir-block.0): +# CHECK: $r1d = LAY $r15d, 4096, $noreg +# CHECK: bb.1: +# CHECK: $r1d = LAY $r15d, 8192, $noreg +# CHECK: bb.2: +# CHECK: $r1d = LAY $r15d, 4096, $noreg +# CHECK-NOT: LAY +--- +name: fun2 +tracksRegLiveness: true +stack: + - { id: 0, size: 5000 } + - { id: 1, size: 5000 } + - { id: 2, size: 2500 } + +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $f0d + successors: %bb.2(0x00000001), %bb.1(0x7fffffff) + + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.1, 0, $noreg + CHIMux undef $r0l, 3, implicit-def $cc + BRC 14, 8, %bb.2, implicit killed $cc + J %bb.1 + + bb.1: + liveins: $f0d + VST64 renamable $f0d, %stack.2, 0, $noreg + J %bb.2 + + bb.2: + liveins: $f0d + VST64 renamable $f0d, %stack.1, 0, $noreg + VST64 renamable $f0d, %stack.1, 0, $noreg + Return +... + +# Test case with a loop (with room for improvement). +# CHECK: # *** IR Dump After Prologue/Epilogue Insertion & Frame Finalization +# CHECK-NEXT: # Machine code for function fun3 +# CHECK: bb.0 (%ir-block.0): +# CHECK: $r1d = LAY $r15d, 4096, $noreg +# CHECK: bb.1: +# CHECK: $r1d = LAY $r15d, 4096, $noreg +# CHECK: bb.2: +# CHECK: $r1d = LAY $r15d, 4096, $noreg +--- +name: fun3 +tracksRegLiveness: true +stack: + - { id: 0, size: 5000 } + - { id: 1, size: 2500 } + - { id: 2, size: 2500 } + +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $f0d + successors: %bb.2(0x00000001), %bb.1(0x7fffffff) + + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.1, 0, $noreg + CHIMux undef $r0l, 3, implicit-def $cc + BRC 14, 8, %bb.2, implicit killed $cc + J %bb.1 + + bb.1: + liveins: $f0d + successors: %bb.2(0x00000001), %bb.1(0x7fffffff) + + VST64 renamable $f0d, %stack.2, 0, $noreg + CHIMux undef $r0l, 3, implicit-def $cc + BRC 14, 8, %bb.1, implicit killed $cc + J %bb.2 + + bb.2: + liveins: $f0d + VST64 renamable $f0d, %stack.1, 0, $noreg + Return +... + +# Test case with a call which clobbers r1. +# CHECK: # *** IR Dump After Prologue/Epilogue Insertion & Frame Finalization +# CHECK-NEXT: # Machine code for function fun4 +# CHECK: bb.0 (%ir-block.0): +# CHECK: $r1d = LAY $r15d, 4096, $noreg +# CHECK: CallBRASL +# CHECK: $r1d = LAY $r15d, 4096, $noreg +--- +name: fun4 +tracksRegLiveness: true +stack: + - { id: 0, size: 5000 } + - { id: 1, size: 2500 } + +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $f0d, $f12d + + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.1, 0, $noreg + ADJCALLSTACKDOWN 0, 0 + CallBRASL @foo, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l + ADJCALLSTACKUP 0, 0 + VST64 renamable $f12d, %stack.1, 0, $noreg + Return +... + +# Test case where index reg is loaded instead of using an LAY. +# CHECK: # *** IR Dump After Prologue/Epilogue Insertion & Frame Finalization +# CHECK-NEXT: # Machine code for function fun5 +# CHECK: bb.0 (%ir-block.0): +# CHECK: $r1d = LGHI 4096 +# CHECK-NOT: LGHI +--- +name: fun5 +tracksRegLiveness: true +stack: + - { id: 0, size: 5000 } + - { id: 1, size: 2500 } + +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $f0d, $f12d + + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + VST64 renamable $f0d, %stack.0, 0, $noreg + $f0q = nofpexcept LXEB %stack.1, 0, $noreg, implicit $fpc + $f1q = nofpexcept LXEB %stack.1, 0, $noreg, implicit $fpc + Return +...