Index: lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- lib/Target/PowerPC/PPCMIPeephole.cpp +++ lib/Target/PowerPC/PPCMIPeephole.cpp @@ -23,6 +23,8 @@ #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -32,6 +34,8 @@ #define DEBUG_TYPE "ppc-mi-peepholes" +STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI"); + namespace llvm { void initializePPCMIPeepholePass(PassRegistry&); } @@ -50,6 +54,8 @@ } private: + MachineDominatorTree *MDT; + // Initialize class variables. void initialize(MachineFunction &MFParm); @@ -61,6 +67,13 @@ unsigned lookThruCopyLike(unsigned SrcReg); public: + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { if (skipFunction(*MF.getFunction())) @@ -74,11 +87,24 @@ void PPCMIPeephole::initialize(MachineFunction &MFParm) { MF = &MFParm; MRI = &MF->getRegInfo(); + MDT = &getAnalysis(); TII = MF->getSubtarget().getInstrInfo(); DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n"); DEBUG(MF->dump()); } +MachineInstr *getVRegDefOrNull(MachineOperand *Op, MachineRegisterInfo *MRI) { + assert(Op && "Invalid Operand!"); + if (!Op->isReg()) + return nullptr; + + unsigned Reg = Op->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return nullptr; + + return MRI->getVRegDef(Reg); +} + // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; @@ -336,6 +362,92 @@ } break; } + + case PPC::ADD4: + case PPC::ADD8: { + auto replaceLiWithAddi = [&](MachineOperand *DominatorOp, + MachineOperand *PhiOp) { + assert(PhiOp && "Invalid Operand!"); + assert(DominatorOp && "Invalid Operand!"); + MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI); + MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI); + if (!DefPhiMI || !DefDomMI) + return false; + + if (DefPhiMI->getOpcode() != PPC::PHI) + return false; + + if (!MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg())) + return false; + + // Note: the vregs only show up at odd indices position of PHI Node, + // the even indices position save the BB info. + for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) { + MachineInstr *LiMI = + getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI); + if (!LiMI || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) || + !MDT->dominates(DefDomMI, LiMI) || + (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)) + return false; + } + + // Note: we already known DominatorOp is virtual register above + unsigned DominatorReg = DominatorOp->getReg(); + + const TargetRegisterClass *TRC = + MI.getOpcode() == PPC::ADD8 ? &PPC::G8RC_and_G8RC_NOX0RegClass + : &PPC::GPRC_and_GPRC_NOR0RegClass; + MRI->setRegClass(DominatorReg, TRC); + + // replace LIs with ADDIs + for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) { + MachineInstr *LiMI = + getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI); + DEBUG(dbgs() << "Optimizing LI to ADDI: "); + DEBUG(LiMI->dump()); + + // There could be repeated registers in the PHI, e.g: %vreg1 = + // PHI %vreg6, , %vreg8, , %vreg8, ; So if we've + // already replaced the def instruction, skip. + if (LiMI->getOpcode() == PPC::ADDI || + LiMI->getOpcode() == PPC::ADDI8) + continue; + + assert((LiMI->getOpcode() == PPC::LI || + LiMI->getOpcode() == PPC::LI8) && + "Invalid Opcode!"); + auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI + LiMI->RemoveOperand(1); // remove the imm of LI + LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI + : PPC::ADDI8)); + MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI) + .addReg(DominatorReg) + .addImm(LiImm); // restore the imm of LI + DEBUG(LiMI->dump()); + } + + return true; + }; + + auto replaceAddWithCopy = [&](MachineOperand &PhiOp) { + DEBUG(dbgs() << "Optimizing ADD to COPY: "); + DEBUG(MI.dump()); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .add(PhiOp); + ToErase = &MI; + Simplified = true; + NumOptADDLIs++; + }; + + MachineOperand Op1 = MI.getOperand(1); + MachineOperand Op2 = MI.getOperand(2); + if (replaceLiWithAddi(&Op2, &Op1)) + replaceAddWithCopy(Op1); + else if (replaceLiWithAddi(&Op1, &Op2)) + replaceAddWithCopy(Op2); + break; + } } } // If the last instruction was marked for elimination, Index: test/CodeGen/PowerPC/opt-li-add-to-addi.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/opt-li-add-to-addi.ll @@ -0,0 +1,60 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +define i64 @testOptimizeLiAddToAddi(i64 %a) { +; CHECK-LABEL: testOptimizeLiAddToAddi: +; CHECK: addi 3, 30, 2444 +; CHECK: bl callv +; CHECK: addi 3, 30, 234 +; CHECK: bl call +; CHECK: blr +entry: + %cmp = icmp sgt i64 %a, 33 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void bitcast (void (...)* @callv to void ()*)() + br label %if.end + +if.end: + %add.0 = phi i64 [ 234, %if.then ], [ 2444, %entry ] + %add2 = add nsw i64 %add.0, %a + %call = tail call i64 @call(i64 %add2) + ret i64 %call +} + +define i64 @testThreePhiIncomings(i64 %a) { +; CHECK-LABEL: testThreePhiIncomings: +; CHECK: bl callv +; CHECK: addi 3, 30, 234 +; CHECK: addi 3, 30, 2444 +; CHECK: bl callv +; CHECK: addi 3, 30, 345 +; CHECK: bl call +; CHECK: blr +entry: + %cmp = icmp slt i64 %a, 33 + br i1 %cmp, label %if.then, label %if.else + +if.then: + tail call void bitcast (void (...)* @callv to void ()*)() + br label %if.end4 + +if.else: + %cmp1 = icmp slt i64 %a, 55 + br i1 %cmp1, label %if.then2, label %if.end4 + +if.then2: + tail call void bitcast (void (...)* @callv to void ()*)() + br label %if.end4 + +if.end4: + %add.0 = phi i64 [ 234, %if.then ], [ 345, %if.then2 ], [ 2444, %if.else ] + %add5 = add nsw i64 %add.0, %a + %call = tail call i64 @call(i64 %add5) + ret i64 %call +} + +declare void @callv(...) + +declare i64 @call(i64)