Index: llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -35,6 +35,7 @@ "Number of instructions deleted in pre-emit peephole"); STATISTIC(NumberOfSelfCopies, "Number of self copy instructions eliminated"); +STATISTIC(NumCopyOfImmediateFolded, "Number of copies of immediates folded"); static cl::opt RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true), @@ -57,6 +58,62 @@ MachineFunctionProperties::Property::NoVRegs); } + /// Fold following pattern which is generated after tailduplication: + /// ``` + /// li r6, 0 + /// mr r3, r6 + /// blr + /// ``` + /// into + /// ``` + /// li r3, 0 + /// blr + /// ``` + bool foldCopyOfImmediate(MachineInstr &MI, + SmallVector &InstrsToErase) { + MachineBasicBlock *BB = MI.getParent(); + MachineRegisterInfo *MRI = &BB->getParent()->getRegInfo(); + // FIXME: Since we only find such patterns in termination BB after + // TailDuplication, no need to bother ourselves to seek a general + // implementation currently. We can extend it gradually if such patterns + // are found in non-termination BB. + if (MRI->isSSA() || !BB->succ_empty()) + return false; + unsigned Opc = MI.getOpcode(); + if (!((Opc == PPC::OR || Opc == PPC::OR8) && + MI.getOperand(1).getReg() == MI.getOperand(2).getReg())) + return false; + unsigned Reg = MI.getOperand(1).getReg(); + bool SeenIntermediateUse = false; + const PPCInstrInfo *TII = + BB->getParent()->getSubtarget().getInstrInfo(); + MachineInstr *DefMI = TII->getDefMIPostRA(Reg, MI, SeenIntermediateUse); + if (DefMI == nullptr) + return false; + unsigned DefOpc = DefMI->getOpcode(); + if (DefOpc != PPC::LI && DefOpc != PPC::LI8 && DefOpc != PPC::LIS && + DefOpc != PPC::LIS8) + return false; + assert(DefMI->getOperand(1).isImm() && "Should be an immediate"); + MachineInstr *NewInstr = + BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), + TII->get(DefMI->getOpcode()), MI.getOperand(0).getReg()) + .add(DefMI->getOperand(1)); + (void)NewInstr; + + NumCopyOfImmediateFolded++; + LLVM_DEBUG(dbgs() << "Fold instructions: "); + LLVM_DEBUG(DefMI->dump()); + LLVM_DEBUG(MI.dump()); + LLVM_DEBUG(dbgs() << "To: "); + LLVM_DEBUG(NewInstr->dump()); + + InstrsToErase.push_back(&MI); + if (!SeenIntermediateUse && MI.getOperand(1).isKill()) + InstrsToErase.push_back(DefMI); + return true; + } + // This function removes any redundant load immediates. It has two level // loops - The outer loop finds the load immediates BBI that could be used // to replace following redundancy. The inner loop scans instructions that @@ -201,6 +258,7 @@ InstrsToErase.push_back(DefMIToErase); } } + Changed |= foldCopyOfImmediate(MI, InstrsToErase); } // Eliminate conditional branch based on a constant CR bit by Index: llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll =================================================================== --- llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll +++ llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll @@ -26,8 +26,7 @@ ; CHECK-P9-NEXT: cmplwi r3, 2 ; CHECK-P9-NEXT: bge- cr0, .LBB0_6 ; CHECK-P9-NEXT: # %bb.3: # %land.lhs.true.1 -; CHECK-P9-NEXT: li r5, 0 -; CHECK-P9-NEXT: mr r3, r5 +; CHECK-P9-NEXT: li r3, 0 ; CHECK-P9-NEXT: blr ; CHECK-P9-NEXT: .LBB0_4: # %lor.lhs.false ; CHECK-P9-NEXT: cmplwi cr0, r4, 0