Index: llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -23,6 +23,8 @@ #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -33,6 +35,8 @@ #define DEBUG_TYPE "ppc-mi-peepholes" +STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI"); + namespace llvm { void initializePPCMIPeepholePass(PassRegistry&); } @@ -51,6 +55,8 @@ } private: + MachineDominatorTree *MDT; + // Initialize class variables. void initialize(MachineFunction &MFParm); @@ -65,6 +71,13 @@ unsigned lookThruCopyLike(unsigned SrcReg); public: + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { if (skipFunction(*MF.getFunction())) @@ -78,11 +91,25 @@ void PPCMIPeephole::initialize(MachineFunction &MFParm) { MF = &MFParm; MRI = &MF->getRegInfo(); + MDT = &getAnalysis(); TII = MF->getSubtarget().getInstrInfo(); DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n"); DEBUG(MF->dump()); } +static MachineInstr *getVRegDefOrNull(MachineOperand *Op, + MachineRegisterInfo *MRI) { + assert(Op && "Invalid Operand!"); + if (!Op->isReg()) + return nullptr; + + unsigned Reg = Op->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return nullptr; + + return MRI->getVRegDef(Reg); +} + // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; @@ -340,8 +367,97 @@ } break; } + + // TODO: Any instruction that has an immediate form fed only by a PHI + // whose operands are all load immediate can be folded away. We currently + // do this for ADD instructions, but should expand it to arithmetic and + // binary instructions with immediate forms in the future. + case PPC::ADD4: + case PPC::ADD8: { + auto isSingleUsePHI = [&](MachineOperand *PhiOp) { + assert(PhiOp && "Invalid Operand!"); + MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI); + + return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) && + MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg()); + }; + + auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp, + MachineOperand *PhiOp) { + assert(PhiOp && "Invalid Operand!"); + assert(DominatorOp && "Invalid Operand!"); + MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI); + MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI); + + // Note: the vregs only show up at odd indices position of PHI Node, + // the even indices position save the BB info. + for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) { + MachineInstr *LiMI = + getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI); + if (!LiMI || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) || + !MDT->dominates(DefDomMI, LiMI) || + (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)) + return false; + } + + return true; + }; + + MachineOperand Op1 = MI.getOperand(1); + MachineOperand Op2 = MI.getOperand(2); + if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2)) + std::swap(Op1, Op2); + else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1)) + break; // We don't have an ADD fed by LI's that can be transformed + + // Now we know that Op1 is the PHI node and Op2 is the dominator + unsigned DominatorReg = Op2.getReg(); + + const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8 + ? &PPC::G8RC_and_G8RC_NOX0RegClass + : &PPC::GPRC_and_GPRC_NOR0RegClass; + MRI->setRegClass(DominatorReg, TRC); + + // replace LIs with ADDIs + MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI); + for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) { + MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI); + DEBUG(dbgs() << "Optimizing LI to ADDI: "); + DEBUG(LiMI->dump()); + + // There could be repeated registers in the PHI, e.g: %vreg1 = + // PHI %vreg6, , %vreg8, , %vreg8, ; So if we've + // already replaced the def instruction, skip. + if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8) + continue; + + assert((LiMI->getOpcode() == PPC::LI || + LiMI->getOpcode() == PPC::LI8) && + "Invalid Opcode!"); + auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI + LiMI->RemoveOperand(1); // remove the imm of LI + LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI + : PPC::ADDI8)); + MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI) + .addReg(DominatorReg) + .addImm(LiImm); // restore the imm of LI + DEBUG(LiMI->dump()); + } + + // Replace ADD with COPY + DEBUG(dbgs() << "Optimizing ADD to COPY: "); + DEBUG(MI.dump()); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .add(Op1); + ToErase = &MI; + Simplified = true; + NumOptADDLIs++; + break; + } } } + // If the last instruction was marked for elimination, // remove it now. if (ToErase) { Index: llvm/trunk/test/CodeGen/PowerPC/opt-li-add-to-addi.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/opt-li-add-to-addi.ll +++ llvm/trunk/test/CodeGen/PowerPC/opt-li-add-to-addi.ll @@ -0,0 +1,60 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +define i64 @testOptimizeLiAddToAddi(i64 %a) { +; CHECK-LABEL: testOptimizeLiAddToAddi: +; CHECK: addi 3, 30, 2444 +; CHECK: bl callv +; CHECK: addi 3, 30, 234 +; CHECK: bl call +; CHECK: blr +entry: + %cmp = icmp sgt i64 %a, 33 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void bitcast (void (...)* @callv to void ()*)() + br label %if.end + +if.end: + %add.0 = phi i64 [ 234, %if.then ], [ 2444, %entry ] + %add2 = add nsw i64 %add.0, %a + %call = tail call i64 @call(i64 %add2) + ret i64 %call +} + +define i64 @testThreePhiIncomings(i64 %a) { +; CHECK-LABEL: testThreePhiIncomings: +; CHECK: bl callv +; CHECK: addi 3, 30, 234 +; CHECK: addi 3, 30, 2444 +; CHECK: bl callv +; CHECK: addi 3, 30, 345 +; CHECK: bl call +; CHECK: blr +entry: + %cmp = icmp slt i64 %a, 33 + br i1 %cmp, label %if.then, label %if.else + +if.then: + tail call void bitcast (void (...)* @callv to void ()*)() + br label %if.end4 + +if.else: + %cmp1 = icmp slt i64 %a, 55 + br i1 %cmp1, label %if.then2, label %if.end4 + +if.then2: + tail call void bitcast (void (...)* @callv to void ()*)() + br label %if.end4 + +if.end4: + %add.0 = phi i64 [ 234, %if.then ], [ 345, %if.then2 ], [ 2444, %if.else ] + %add5 = add nsw i64 %add.0, %a + %call = tail call i64 @call(i64 %add5) + ret i64 %call +} + +declare void @callv(...) + +declare i64 @call(i64)