Index: lib/Target/PowerPC/PPCRegCopyElim.cpp =================================================================== --- lib/Target/PowerPC/PPCRegCopyElim.cpp +++ lib/Target/PowerPC/PPCRegCopyElim.cpp @@ -35,6 +35,8 @@ // //===---------------------------------------------------------------------===// +#include "llvm/ADT/DenseMap.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCInstrBuilder.h" #include "PPCTargetMachine.h" @@ -224,19 +226,114 @@ return nullptr; } +// This method eliminates the specified BB if it includes only +// an unconditional branch and IMPLICIT_DEFs. +// We assume the following CFG. +// +// PredMBB +// / | +// SideMBB EmptyMBB +// / | +// NextMBB +// +// - EmptyMBB has only one successor and only one predecessor. +// - PredMBB has two successors (i.e. multi-target branch is not supported). + +static bool eraseBranchOnlyBB(MachineBasicBlock *EmptyMBB, + SmallVector &BBToErase) { + if (EmptyMBB->pred_size() != 1) + return false; + + MachineBasicBlock *PredMBB = *(EmptyMBB->pred_begin()); + if (PredMBB->succ_size() != 2) + return false; + + // We allow IMPLICIT_DEF to be included in an empty BB + // since IMPLICIT_DEF does not generate a machine instruction. + MachineBasicBlock::iterator MII = EmptyMBB->getFirstNonDebugInstr(); + while (MII != EmptyMBB->end() && MII->isImplicitDef()) + MII++; + if (MII == EmptyMBB->end() || MII->getOpcode() != PPC::B) + return false; + MachineBasicBlock* NextMBB = MII->getOperand(0).getMBB(); + + auto getBranchTargetOpIdx = [](MachineInstr &I) { + if (I.getOpcode() == PPC::BCC) + return 2; + if (I.getOpcode() == PPC::BC || I.getOpcode() == PPC::BCn) + return 1; + if (I.getOpcode() == PPC::B) + return 0; + return -1; + }; + + // We iterate terniators in PredMBB and replace the target of the branch + // to EmptyMBB with NextMBB. + MachineBasicBlock::iterator TII = PredMBB->getFirstTerminator(); + for(; TII != PredMBB->end(); TII++) { + int Idx = getBranchTargetOpIdx(*TII); + if (Idx != -1 && TII->getOperand(Idx).getMBB() == EmptyMBB) { + // To opt: If NextMBB is the layout successor of PredMBB, we can further + // erase one branch instruction. But it seems a rare case. + TII->getOperand(Idx).setMBB(NextMBB); + PredMBB->replaceSuccessor(EmptyMBB, NextMBB); + EmptyMBB->removeSuccessor(NextMBB); + BBToErase.push_back(EmptyMBB); + return true; + } + } + + auto FirstSuccBB = PredMBB->succ_begin(); + MachineBasicBlock *SideMBB = (EmptyMBB == *FirstSuccBB) ? *(FirstSuccBB + 1): + * FirstSuccBB; + // If EmptyMBB is the current fall through from PredMBB and + // SideMBB becomes the fall through after eliminating EmptyMBB, + // we rewrite the conditional branch to SideMBB with a branch to NextMBB. + if (PredMBB->isLayoutSuccessor(EmptyMBB) && + EmptyMBB->isLayoutSuccessor(SideMBB)) { + TII = PredMBB->getFirstTerminator(); + MachineInstr &BrToSide = *TII; + if (BrToSide.getOpcode() == PPC::BCC || BrToSide.getOpcode() == PPC::BC) { + int Idx = getBranchTargetOpIdx(BrToSide); + if (BrToSide.getOperand(Idx).getMBB() == SideMBB && + ++TII == PredMBB->end()) { + BrToSide.getOperand(Idx).setMBB(NextMBB); + PPC::Predicate Pred = (PPC::Predicate)BrToSide.getOperand(0).getImm(); + BrToSide.getOperand(0).setImm(PPC::InvertPredicate(Pred)); + PredMBB->replaceSuccessor(EmptyMBB, NextMBB); + BBToErase.push_back(EmptyMBB); + return true; + } + } + } + return false; +} + bool PPCRegCopyElim::eliminateRedundantRegCopy(void) { const PPCSubtarget *PPCSubTarget = &MF->getSubtarget(); const PPCFrameLowering *TFI = PPCSubTarget->getFrameLowering(); const PPCRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); bool Simplified = false; + // Utility lambdas auto ResetKillFlags = [](MachineInstr *MI, unsigned Reg) { for (auto &MO : MI->uses()) if (MO.isReg() && MO.getReg() == Reg) MO.setIsKill(false); }; + auto isSameRegCopy = [](MachineInstr *I1, MachineInstr *I2) { + MachineOperand *Src1 = nullptr, *Dst1 = nullptr; + MachineOperand *Src2 = nullptr, *Dst2 = nullptr; + if (!isRegCopy(*I1, Src1, Dst1) || !isRegCopy(*I2, Src2, Dst2)) + return false; + return I1->getOpcode() == I2->getOpcode() && + Src1->getReg() == Src2->getReg() && + Dst1->getReg() == Dst2->getReg(); + }; + SmallPtrSet WorkSet; + DenseMap> InterBBRegCopys; for (MachineBasicBlock &MBB : *MF) WorkSet.insert(&MBB); @@ -301,6 +398,7 @@ DefMI->eraseFromParent(); else ResetKillFlags(DefMI, DstReg); + InterBBRegCopys[&MBB].erase(DefMI); } else { // If the SrcReg of reg copy will not be used, @@ -316,12 +414,71 @@ CopyMI.eraseFromParent(); for (auto &MO : OperandsToRewrite1) MO->setReg(DstReg); + InterBBRegCopys[&MBB].erase(&CopyMI); CurrentInstrErased = true; break; } continue; } + if (MBB.pred_size() == 1 && !IsDstUsed) { + // We found a potentially mergeable register copy. + InterBBRegCopys[&MBB].insert(&CopyMI); + + for (auto &PredMBB : MBB.predecessors()) { + // If all the successors of PredMBB have the same register copy, + // we try to merge them into the end of PredMBB. + // If this optimization makes an empty BB, it will be eliminated + // later. + MachineBasicBlock::iterator TII = PredMBB->getFirstTerminator(); + if (PredMBB->succ_size() == 1 || TII == PredMBB->end()) + continue; + + // We can optimize if all the successors: + // - have only one predecessor (PredMBB), and + // - have the same register copy. + auto canOptimize = [&](MachineBasicBlock *MBB) { + if (MBB->pred_size() > 1) + return false; + for (auto I : InterBBRegCopys[MBB]) + if (isSameRegCopy(I, &CopyMI)) + return true; + return false; + }; + + if (llvm::all_of(PredMBB->successors(), canOptimize)) { + DEBUG(dbgs() << "Merging multiple register copies\n"); + + for (auto &SuccMBB : PredMBB->successors()) + SuccMBB->addLiveIn(DstReg); + + ResetKillFlags(&CopyMI, SrcReg); + PredMBB->splice(TII, &MBB, CopyMI); + InterBBRegCopys[&MBB].erase(&CopyMI); + + for (auto &SuccMBB : PredMBB->successors()) { + if (SuccMBB == &MBB) + continue; + for (auto I : InterBBRegCopys[SuccMBB]) + if (isSameRegCopy(I, &CopyMI)) { + I->eraseFromParent(); + InterBBRegCopys[SuccMBB].erase(I); + break; + } + } + + // The copied register copy may be further optimized. + // So we will optimize this basic block later again. + WorkSet.insert(PredMBB); + + CurrentInstrErased = true; + break; + } + } + if (CurrentInstrErased) + break; + } + // If no def and no conflicting instruction is found in this BB, // from here, we handle redundancy among multiple BBs. // We currently support following CFGs. @@ -414,6 +571,7 @@ } else DefMI->eraseFromParent(); + InterBBRegCopys[Pred1MBB].erase(DefMI); } // We move the second reg copy to Pred2MBB by inserting before @@ -431,6 +589,7 @@ } else CopyMI.eraseFromParent(); + InterBBRegCopys[&MBB].erase(&CopyMI); if (IsKill) MBB.removeLiveIn(SrcReg); @@ -458,6 +617,16 @@ } } while(CurrentInstrErased); } + + // We eliminate empty BBs (i.e. including only unconditional branch). + SmallVector BBToErase; + for (MachineBasicBlock &MBB : *MF) + eraseBranchOnlyBB(&MBB, BBToErase); + + for (auto BB : BBToErase) { + DEBUG(dbgs() << "Eliminating an empty BB#" << BB->getNumber() << "\n"); + BB->eraseFromParent(); + } return Simplified; } Index: test/CodeGen/PowerPC/redundant_regcopy_3.mir =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/redundant_regcopy_3.mir @@ -0,0 +1,86 @@ +# test for BB-local register copy elimination +# RUN: llc -run-pass ppc-regcopy-elim -verify-machineinstrs -o - %s | FileCheck %s + +--- | + target datalayout = "E-m:e-i64:64-n32:64" + target triple = "powerpc64le-unknown-linux-gnu" + define i8* @func(i8* %a, i1 %b) { + entry: + br i1 %b, label %exit, label %foo + + foo: ; preds = %entry + br label %exit + + bar: ; preds = %entry + br label %exit + + exit: ; preds = %foo, %bar + ret i8* %a + } + +... +--- +name: func +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%x3' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + successors: %bb.2.bar(0x40000000), %bb.1.foo(0x40000000) + liveins: %x3 + + ; Register copys (%x4 = OR8 %x3, %x3) in foo and bar are hoisted and merged into this BB. + ; CHECK-LABEL: bb.0.entry: + ; CHECK: %cr0 = CMPLDI %x3, 0 + ; CHECK: %x4 = OR8 %x3, %x3 + %cr0 = CMPLDI %x3, 0 + BCC 76, killed %cr0, %bb.2.bar + B %bb.1.foo + + bb.1.foo: + liveins: %x3 + + ; CHECK-LABEL: bb.1.foo: + ; CHECK-NOT: OR8 + ; CHECK: %x4 = ADDI8 %x4, 123 + %x4 = OR8 %x3, %x3 + %x4 = ADDI8 %x4, 123 + B %bb.3.exit + + bb.2.bar: + liveins: %x3 + + ; This BB becomes empty and be eliminated + ; CHECK-NOT: bb.2.bar: + %x4 = OR8 %x3, %x3 + B %bb.3.exit + + bb.3.exit: + liveins: %x4 + ; CHECK-LABEL: bb.3.exit: + ; CHECK-NOT: OR8 + ; CHECK: BLR8 + %x3 = ADDI8 %x4, 456 + BLR8 implicit %lr8, implicit %rm + +... +