diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -336,6 +336,9 @@ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override; + bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + Register Reg) const; + // If conversion by predication (only supported by some branch instructions). // All of the profitability checks always return true; it is always // profitable to use the predicated branches. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1350,9 +1350,11 @@ return false; } -bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, - Register Reg, MachineRegisterInfo *MRI) const { - // For some instructions, it is legal to fold ZERO into the RA register field. +// For some instructions, it is legal to fold ZERO into the RA register field. +// This function performs that fold by replacing the operand with PPC::ZERO, +// it does not consider whether the load immediate zero is no longer in use. +bool PPCInstrInfo::onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + Register Reg) const { // A zero immediate should always be loaded with a single li. unsigned DefOpc = DefMI.getOpcode(); if (DefOpc != PPC::LI && DefOpc != PPC::LI8) @@ -1372,6 +1374,8 @@ if (UseMCID.isPseudo()) return false; + // We need to find which of the User's operands is to be folded, that will be + // the operand that matches the given register ID. unsigned UseIdx; for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx) if (UseMI.getOperand(UseIdx).isReg() && @@ -1409,13 +1413,19 @@ PPC::ZERO8 : PPC::ZERO; } - bool DeleteDef = MRI->hasOneNonDBGUse(Reg); UseMI.getOperand(UseIdx).setReg(ZeroReg); + return true; +} - if (DeleteDef) +// Folds zero into instructions which have a load immediate zero as an operand +// but also recognize zero as immediate zero. If the definition of the load +// has no more users it is deleted. +bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + Register Reg, MachineRegisterInfo *MRI) const { + bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg); + if (MRI->use_nodbg_empty(Reg)) DefMI.eraseFromParent(); - - return true; + return Changed; } static bool MBBDefinesCTR(MachineBasicBlock &MBB) { diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -57,6 +57,8 @@ "Number of pairs of rotate left, clear left/right collapsed"); STATISTIC(NumEXTSWAndSLDICombined, "Number of pairs of EXTSW and SLDI combined as EXTSWSLI"); +STATISTIC(NumLoadImmZeroFoldedAndRemoved, + "Number of LI(8) reg, 0 that are folded to r0 and removed"); static cl::opt FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), @@ -319,7 +321,22 @@ default: break; - + case PPC::LI: + case PPC::LI8: { + // If we are materializing a zero, look for any use operands for which + // zero means immediate zero. All such operands can be replaced with + // PPC::ZERO. + if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0) + break; + unsigned MIDestReg = MI.getOperand(0).getReg(); + for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg)) + Simplified |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg); + if (MRI->use_nodbg_empty(MIDestReg)) { + ++NumLoadImmZeroFoldedAndRemoved; + ToErase = &MI; + } + break; + } case PPC::STD: { MachineFrameInfo &MFI = MF->getFrameInfo(); if (MFI.hasVarSizedObjects() || diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir --- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir +++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir @@ -1615,7 +1615,7 @@ %0 = LI8 89 %2 = CMPDI %0, 87 %4 = ISEL8 $zero8, %0, %2.sub_gt - ; CHECK: LI8 0 + ; CHECK: ADDI8 %1, 0 %5 = ADD8 killed %4, %1 $x3 = COPY %5 BLR8 implicit $lr8, implicit $rm, implicit $x3 @@ -2017,7 +2017,7 @@ %3 = LI -3 %4 = CMPLWI %3, 87 %6 = ISEL $zero, %3, %4.sub_gt - ; CHECK: LI 0 + ; CHECK: ADDI killed %2, 0 %7 = ADD4 killed %6, killed %2 %9 = IMPLICIT_DEF %8 = INSERT_SUBREG %9, killed %7, 1 diff --git a/llvm/test/CodeGen/PowerPC/fold-remove-li.ll b/llvm/test/CodeGen/PowerPC/fold-remove-li.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fold-remove-li.ll @@ -0,0 +1,40 @@ +; NOTE: This test verifies that a redundant load immediate of zero is folded +; NOTE: from its use in an isel and deleted as it is no longer in use. +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s + +%0 = type { i32, i16 } + +@val = common dso_local local_unnamed_addr global %0* null, align 8 + +define dso_local signext i32 @redunLoadImm(%0* %arg) { +; CHECK-LABEL: redunLoadImm: +; verify that the load immediate has been folded into the isel and deleted +; CHECK-NOT: li r[[REG1:[0-9]+]], 0 +; CHECK: isel r[[REG2:[0-9]+]], 0, r[[REG3:[0-9]+]], eq + +bb: + %tmp = icmp eq %0* %arg, null + br i1 %tmp, label %bb9, label %bb1 + +bb1: ; preds = %bb + %tmp2 = getelementptr inbounds %0, %0* %arg, i64 0, i32 1 + br label %bb3 + +bb3: ; preds = %bb3, %bb1 + %tmp4 = load i16, i16* %tmp2, align 4 + %tmp5 = sext i16 %tmp4 to i64 + %tmp6 = getelementptr inbounds %0, %0* %arg, i64 %tmp5 + %tmp7 = icmp eq i16 %tmp4, 0 + %tmp8 = select i1 %tmp7, %0* null, %0* %tmp6 + store %0* %tmp8, %0** @val, align 8 + br label %bb3 + +bb9: ; preds = %bb + %tmp10 = load %0*, %0** @val, align 8 + %tmp11 = getelementptr inbounds %0, %0* %tmp10, i64 0, i32 0 + %tmp12 = load i32, i32* %tmp11, align 4 + ret i32 %tmp12 +} diff --git a/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll b/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll --- a/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll +++ b/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll @@ -13,8 +13,7 @@ ; CHECK: addic 29, 0, 20 ; Save CR through R12 using R29 as the stack pointer (aligned base pointer). ; CHECK: mfcr 12 -; CHECK: stw 28, -24(29) -; CHECK: stw 12, -28(29) +; CHECK: stw 12, -24(29) target datalayout = "E-m:e-p:32:32-i64:64-n32" target triple = "powerpc-unknown-freebsd"