Index: lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- lib/Target/PowerPC/PPCMIPeephole.cpp +++ lib/Target/PowerPC/PPCMIPeephole.cpp @@ -23,6 +23,8 @@ #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -32,6 +34,8 @@ #define DEBUG_TYPE "ppc-mi-peepholes" +STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI"); + namespace llvm { void initializePPCMIPeepholePass(PassRegistry&); } @@ -50,6 +54,8 @@ } private: + MachineDominatorTree *MDT; + // Initialize class variables. void initialize(MachineFunction &MFParm); @@ -61,8 +67,15 @@ unsigned lookThruCopyLike(unsigned SrcReg); public: + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { + MDT = &getAnalysis(); if (skipFunction(*MF.getFunction())) return false; initialize(MF); @@ -336,6 +349,101 @@ } break; } + + case PPC::ADD4: + case PPC::ADD8: { + auto getVRegDefMI = [&](MachineOperand *Op, MachineRegisterInfo *MRI) { + assert(Op && "Invalid Operand!"); + if (!Op->isReg()) + return (MachineInstr *)nullptr; + + unsigned Reg = Op->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return (MachineInstr *)nullptr; + + return MRI->getVRegDef(Reg); + }; + + auto replaceLiWithAddi = [&](MachineOperand *DominatorOp, + MachineOperand *PhiOp) { + assert(PhiOp && DominatorOp && "Invalid Operand!"); + MachineInstr *DefPhiMI = getVRegDefMI(PhiOp, MRI); + MachineInstr *DefDomMI = getVRegDefMI(DominatorOp, MRI); + if (!DefPhiMI || !DefDomMI) + return false; + + if (DefPhiMI->getOpcode() != PPC::PHI) + return false; + + if (!MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg())) + return false; + + // Note: the vregs only show up at odd indices position of PHI Node, + // the even indices position save the BB info. + for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) { + MachineInstr *LiMI = getVRegDefMI(&DefPhiMI->getOperand(i), MRI); + if (!LiMI || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) || + !MDT->dominates(DefDomMI, LiMI) || + (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)) + return false; + } + + // Note: we already known DominatorOp is virtual register above + unsigned DominatorReg = DominatorOp->getReg(); + + const TargetRegisterClass *TRC = + MI.getOpcode() == PPC::ADD8 ? &PPC::G8RC_and_G8RC_NOX0RegClass + : &PPC::GPRC_and_GPRC_NOR0RegClass; + MRI->setRegClass(DominatorReg, TRC); + + // replace LIs with ADDIs + for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) { + MachineInstr *LiMI = getVRegDefMI(&DefPhiMI->getOperand(i), MRI); + DEBUG(dbgs() << "Optimizing LI to ADDI: "); + DEBUG(LiMI->dump()); + + // There could be repeated registers in the PHI, e.g: %vreg1 = + // PHI %vreg6, , %vreg8, , %vreg8, ; In such case, + // only replace the first one (in this specific example just replace + // the first %vreg8 from + if (LiMI->getOpcode() == PPC::ADDI || + LiMI->getOpcode() == PPC::ADDI8) + continue; + + assert((LiMI->getOpcode() == PPC::LI || + LiMI->getOpcode() == PPC::LI8) && "Invalid Opcode!"); + auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI + LiMI->RemoveOperand(1); // remove the imm of LI + LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI + : PPC::ADDI8)); + MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI) + .addReg(DominatorReg) + .addImm(LiImm); // restore the imm of LI + DEBUG(LiMI->dump()); // LiMI is actually AddiMI now + } + + return true; + }; + + auto replaceAddWithCopy = [&](MachineOperand &PhiOp) { + DEBUG(dbgs() << "Optimizing ADD to COPY: "); + DEBUG(MI.dump()); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .add(PhiOp); + ToErase = &MI; + Simplified = true; + NumOptADDLIs++; + }; + + MachineOperand Op1 = MI.getOperand(1); + MachineOperand Op2 = MI.getOperand(2); + if (replaceLiWithAddi(&Op2, &Op1)) + replaceAddWithCopy(Op1); + else if (replaceLiWithAddi(&Op1, &Op2)) + replaceAddWithCopy(Op2); + break; + } } } // If the last instruction was marked for elimination, Index: test/CodeGen/PowerPC/opt-li-add-to-addi.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/opt-li-add-to-addi.ll @@ -0,0 +1,28 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +define i64 @testOptimizeLiAddToAddi(i64 %a) { +; CHECK-LABEL: testOptimizeLiAddToAddi: +; CHECK: addi 3, 30, 2444 +; CHECK: bl callv +; CHECK: addi 3, 30, 234 +; CHECK: bl call +; CHECK: blr +entry: + %cmp = icmp sgt i64 %a, 33 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void bitcast (void (...)* @callv to void ()*)() + br label %if.end + +if.end: + %add.0 = phi i64 [ 234, %if.then ], [ 2444, %entry ] + %add2 = add nsw i64 %add.0, %a + %call = tail call i64 @call(i64 %add2) + ret i64 %call +} + +declare void @callv(...) + +declare i64 @call(i64)