diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -307,6 +307,9 @@ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, MachineRegisterInfo *MRI) const override; + bool FoldImmediateWithoutDelete(MachineInstr &UseMI, MachineInstr &DefMI, + unsigned Reg, MachineRegisterInfo *MRI) const; + // If conversion by predication (only supported by some branch instructions). // All of the profitability checks always return true; it is always // profitable to use the predicated branches. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1389,6 +1389,72 @@ return true; } +// This is a modified version of FoldImmediate where the DefMI is not deleted +// Its purpose is to defer the decision to delete to the caller of this function +// and only performs the fold. +bool PPCInstrInfo::FoldImmediateWithoutDelete(MachineInstr &UseMI, + MachineInstr &DefMI, unsigned Reg, + MachineRegisterInfo *MRI) const { + // For some instructions, it is legal to fold ZERO into the RA register field. + // A zero immediate should always be loaded with a single li. + unsigned DefOpc = DefMI.getOpcode(); + if (DefOpc != PPC::LI && DefOpc != PPC::LI8) + return false; + if (!DefMI.getOperand(1).isImm()) + return false; + if (DefMI.getOperand(1).getImm() != 0) + return false; + + // Note that we cannot here invert the arguments of an isel in order to fold + // a ZERO into what is presented as the second argument. All we have here + // is the condition bit, and that might come from a CR-logical bit operation. + + const MCInstrDesc &UseMCID = UseMI.getDesc(); + + // Only fold into real machine instructions. + if (UseMCID.isPseudo()) + return false; + + unsigned UseIdx; + for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx) + if (UseMI.getOperand(UseIdx).isReg() && + UseMI.getOperand(UseIdx).getReg() == Reg) + break; + + assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI"); + assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg"); + + const MCOperandInfo *UseInfo = &UseMCID.OpInfo[UseIdx]; + + // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0 + // register (which might also be specified as a pointer class kind). + if (UseInfo->isLookupPtrRegClass()) { + if (UseInfo->RegClass /* Kind */ != 1) + return false; + } else { + if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID && + UseInfo->RegClass != PPC::G8RC_NOX0RegClassID) + return false; + } + + // Make sure this is not tied to an output register (or otherwise + // constrained). This is true for ST?UX registers, for example, which + // are tied to their output registers. + if (UseInfo->Constraints != 0) + return false; + + unsigned ZeroReg; + if (UseInfo->isLookupPtrRegClass()) { + bool isPPC64 = Subtarget.isPPC64(); + ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO; + } else { + ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ? + PPC::ZERO8 : PPC::ZERO; + } + UseMI.getOperand(UseIdx).setReg(ZeroReg); + return true; +} + static bool MBBDefinesCTR(MachineBasicBlock &MBB) { for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); I != IE; ++I) diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -55,6 +55,8 @@ "Number of pairs of rotate left, clear left/right collapsed"); STATISTIC(NumEXTSWAndSLDICombined, "Number of pairs of EXTSW and SLDI combined as EXTSWSLI"); +STATISTIC(NumLIFoldedAndRemoved, + "Number of LI(8) reg, 0 that are folded to r0 and removed"); static cl::opt FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), @@ -312,7 +314,29 @@ default: break; - + case PPC::LI: + case PPC::LI8: { + // Go through all the users of this load and for the instructions which + // recognize 0 as the number and not the register and replace this load + // as an operand with PPC::ZERO(8) then remove the load if it is no + // needed. + if (!MI.getOperand(1).isImm()) + break; + if (MI.getOperand(1).getImm() != 0) + break; + unsigned MIDestReg = MI.getOperand(0).getReg(); + for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg)) { + bool DeleteDef = MRI->hasOneNonDBGUse(MIDestReg); + bool Folded = + TII->FoldImmediateWithoutDelete(UseMI, MI, MIDestReg, MRI); + if (DeleteDef && Folded) { + ++NumLIFoldedAndRemoved; + ToErase = &MI; + } + Simplified |= Folded; + } + break; + } case PPC::STD: { MachineFrameInfo &MFI = MF->getFrameInfo(); if (MFI.hasVarSizedObjects() || diff --git a/llvm/test/CodeGen/PowerPC/fold-remove-li.ll b/llvm/test/CodeGen/PowerPC/fold-remove-li.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fold-remove-li.ll @@ -0,0 +1,37 @@ +; NOTE: This test verifies that a redundant load immediate of zero is folded +; NOTE: from its use in an isel and deleted as it is no longer in use. +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-asm-full-reg-names \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +%struct.node = type { i32, i16 } + +@val = common dso_local local_unnamed_addr global %struct.node* null, align 8 + +define dso_local signext i32 @redunLoadImm(%struct.node* %arg) { +; CHECK-LABEL: redunLoadImm: +; verify that the load immediate has been folded into the isel and deleted +; CHECK-NOT: li r[[REG1:[0-9]+]], 0 +; CHECK: isel r[[REG2:[0-9]+]], 0, r[[REG3:[0-9]+]], eq +entry: + %tobool = icmp eq %struct.node* %arg, null + br i1 %tobool, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %entry + %next = getelementptr inbounds %struct.node, %struct.node* %arg, i64 0, i32 1 + br label %while.body + +while.body: ; preds = %while.body, %while.body.lr.ph + %0 = load i16, i16* %next, align 4 + %idx.ext = sext i16 %0 to i64 + %add.ptr = getelementptr inbounds %struct.node, %struct.node* %arg, i64 %idx.ext + %cmp = icmp eq i16 %0, 0 + %spec.store.select = select i1 %cmp, %struct.node* null, %struct.node* %add.ptr + store %struct.node* %spec.store.select, %struct.node** @val, align 8 + br label %while.body + +while.end: ; preds = %entry + %1 = load %struct.node*, %struct.node** @val, align 8 + %value = getelementptr inbounds %struct.node, %struct.node* %1, i64 0, i32 0 + %2 = load i32, i32* %value, align 4 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll b/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll --- a/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll +++ b/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll @@ -13,8 +13,7 @@ ; CHECK: addic 29, 0, 20 ; Save CR through R12 using R29 as the stack pointer (aligned base pointer). ; CHECK: mfcr 12 -; CHECK: stw 28, -24(29) -; CHECK: stw 12, -28(29) +; CHECK: stw 12, -24(29) target datalayout = "E-m:e-p:32:32-i64:64-n32" target triple = "powerpc-unknown-freebsd"