diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -307,6 +307,9 @@ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, MachineRegisterInfo *MRI) const override; + bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + unsigned Reg) const; + // If conversion by predication (only supported by some branch instructions). // All of the profitability checks always return true; it is always // profitable to use the predicated branches. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1321,9 +1321,11 @@ return false; } -bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, - unsigned Reg, MachineRegisterInfo *MRI) const { - // For some instructions, it is legal to fold ZERO into the RA register field. +// For some instructions, it is legal to fold ZERO into the RA register field. +// This function performs that fold by replacing the operand with PPC::ZERO, +// it does not consider whether the load immediate zero is no longer in use. +bool PPCInstrInfo::onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + unsigned Reg) const { // A zero immediate should always be loaded with a single li. unsigned DefOpc = DefMI.getOpcode(); if (DefOpc != PPC::LI && DefOpc != PPC::LI8) @@ -1343,6 +1345,8 @@ if (UseMCID.isPseudo()) return false; + // We need to find which of the User's operands is to be folded, that will be + // the operand that matches the given register ID. unsigned UseIdx; for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx) if (UseMI.getOperand(UseIdx).isReg() && @@ -1379,14 +1383,21 @@ ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ? PPC::ZERO8 : PPC::ZERO; } - - bool DeleteDef = MRI->hasOneNonDBGUse(Reg); + UseMI.getOperand(UseIdx).setReg(ZeroReg); + return true; +} - if (DeleteDef) +// Folds zero into instructions which have a load immediate zero as an operand +// but also recognize zero as immediate zero. If the definition of the load +// has no more users it is deleted. +bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + unsigned Reg, MachineRegisterInfo *MRI) const { + bool DeleteDef = MRI->hasOneNonDBGUse(Reg); + bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg); + if (Changed && DeleteDef) DefMI.eraseFromParent(); - - return true; + return Changed; } static bool MBBDefinesCTR(MachineBasicBlock &MBB) { diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -55,6 +55,8 @@ "Number of pairs of rotate left, clear left/right collapsed"); STATISTIC(NumEXTSWAndSLDICombined, "Number of pairs of EXTSW and SLDI combined as EXTSWSLI"); +STATISTIC(NumLoadImmZeroFoldedAndRemoved, + "Number of LI(8) reg, 0 that are folded to r0 and removed"); static cl::opt FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), @@ -312,7 +314,24 @@ default: break; - + case PPC::LI: + case PPC::LI8: { + // If we are materializing a zero, look for any use operands for which + // zero means immediate zero. All such operands can be replaced with + // PPC::ZERO. + if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0) + break; + unsigned MIDestReg = MI.getOperand(0).getReg(); + bool Folded = false; + for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg)) + Folded = TII->onlyFoldImmediate(UseMI, MI, MIDestReg); + Simplified |= Folded; + if (MRI->use_nodbg_empty(MIDestReg) && Folded) { + ++NumLoadImmZeroFoldedAndRemoved; + ToErase = &MI; + } + break; + } case PPC::STD: { MachineFrameInfo &MFI = MF->getFrameInfo(); if (MFI.hasVarSizedObjects() || diff --git a/llvm/test/CodeGen/PowerPC/fold-remove-li.ll b/llvm/test/CodeGen/PowerPC/fold-remove-li.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fold-remove-li.ll @@ -0,0 +1,44 @@ +; NOTE: This test verifies that a redundant load immediate of zero is folded +; NOTE: from its use in an isel and deleted as it is no longer in use. +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-asm-full-reg-names \ +; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=CHECK-LE +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -ppc-asm-full-reg-names \ +; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=CHECK-BE + +%0 = type { i32, i16 } + +@val = common dso_local local_unnamed_addr global %0* null, align 8 + +define dso_local signext i32 @redunLoadImm(%0* %arg) { +; CHECK-LE-LABEL: redunLoadImm: +; verify that the load immediate has been folded into the isel and deleted +; CHECK-LE-NOT: li r[[REG1:[0-9]+]], 0 +; CHECK-LE: isel r[[REG2:[0-9]+]], 0, r[[REG3:[0-9]+]], eq + +; CHECK-BE-LABEL: redunLoadImm: +; verify that the load immediate has been folded into the addi and deleted +; CHECK-BE-NOT: li r[[REG1:[0-9]+]], 0 +; CHECK-BE: addi r[[REG2:[0-9]+]], 0, 0 +bb: + %tmp = icmp eq %0* %arg, null + br i1 %tmp, label %bb9, label %bb1 + +bb1: ; preds = %bb + %tmp2 = getelementptr inbounds %0, %0* %arg, i64 0, i32 1 + br label %bb3 + +bb3: ; preds = %bb3, %bb1 + %tmp4 = load i16, i16* %tmp2, align 4 + %tmp5 = sext i16 %tmp4 to i64 + %tmp6 = getelementptr inbounds %0, %0* %arg, i64 %tmp5 + %tmp7 = icmp eq i16 %tmp4, 0 + %tmp8 = select i1 %tmp7, %0* null, %0* %tmp6 + store %0* %tmp8, %0** @val, align 8 + br label %bb3 + +bb9: ; preds = %bb + %tmp10 = load %0*, %0** @val, align 8 + %tmp11 = getelementptr inbounds %0, %0* %tmp10, i64 0, i32 0 + %tmp12 = load i32, i32* %tmp11, align 4 + ret i32 %tmp12 +} diff --git a/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll b/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll --- a/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll +++ b/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll @@ -13,8 +13,7 @@ ; CHECK: addic 29, 0, 20 ; Save CR through R12 using R29 as the stack pointer (aligned base pointer). ; CHECK: mfcr 12 -; CHECK: stw 28, -24(29) -; CHECK: stw 12, -28(29) +; CHECK: stw 12, -24(29) target datalayout = "E-m:e-p:32:32-i64:64-n32" target triple = "powerpc-unknown-freebsd"