Index: lib/Target/PowerPC/CMakeLists.txt =================================================================== --- lib/Target/PowerPC/CMakeLists.txt +++ lib/Target/PowerPC/CMakeLists.txt @@ -29,6 +29,7 @@ PPCMCInstLower.cpp PPCMachineFunctionInfo.cpp PPCMIPeephole.cpp + PPCPostRAPeephole.cpp PPCRegisterInfo.cpp PPCQPXLoadSplat.cpp PPCSubtarget.cpp Index: lib/Target/PowerPC/PPC.h =================================================================== --- lib/Target/PowerPC/PPC.h +++ lib/Target/PowerPC/PPC.h @@ -40,6 +40,7 @@ FunctionPass *createPPCVSXFMAMutatePass(); FunctionPass *createPPCVSXSwapRemovalPass(); FunctionPass *createPPCMIPeepholePass(); + FunctionPass *createPPCPostRAPeepholePass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCQPXLoadSplatPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); Index: lib/Target/PowerPC/PPCPostRAPeephole.cpp =================================================================== --- /dev/null +++ lib/Target/PowerPC/PPCPostRAPeephole.cpp @@ -0,0 +1,279 @@ +//===-------------- PPCPostRAPeephole.cpp - MI Peephole Cleanups -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This pass modifies register operands of instructions after a register copy. +// For example, in code sequences like +// mr X, Y +// (no update in X or Y) +// addi Z, X, 1 +// this pass updates `addi Z, X, 1` to `addi Z, Y, 1` to increase ILP. +// +//===---------------------------------------------------------------------===// + +#include "PPCInstrInfo.h" +#include "PPC.h" +#include "PPCInstrBuilder.h" +#include "PPCTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "ppc-postra-peepholes" + +namespace llvm { + void initializePPCPostRAPeepholePass(PassRegistry&); +} + +namespace { + +struct PPCPostRAPeephole : public MachineFunctionPass { + + static char ID; + const PPCInstrInfo *TII; + MachineFunction *MF; + + PPCPostRAPeephole() : MachineFunctionPass(ID) { + initializePPCPostRAPeepholePass(*PassRegistry::getPassRegistry()); + } + +private: + // Initialize class variables. + void initialize(MachineFunction &MFParm); + + // Perform peepholes. + bool optUseAfterRegCopy(void); + +public: + // Main entry point for this pass. + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(*MF.getFunction())) + return false; + initialize(MF); + bool changed = false; + + // opt use after regcopy + changed |= optUseAfterRegCopy(); + + return changed; + } +}; + +// A class representing two physical registers having the same value. +class PPCEquiRegPairInfo { +public: + PPCEquiRegPairInfo(unsigned From, unsigned To): + SrcReg(From), DstReg(To), SrcRegKilled(false) {} + + unsigned getSrcReg() { return SrcReg; } + unsigned getDstReg() { return DstReg; } + bool needSrcRegKillFlag() { return SrcRegKilled; } + + // It returns true if the specified register conflicts with SrcReg or DstReg. + bool isConflict(const unsigned Reg, const TargetRegisterInfo *TRI) const { + if (Reg == SrcReg || Reg == DstReg || + TRI->isSuperOrSubRegisterEq(Reg, SrcReg) || + TRI->isSuperOrSubRegisterEq(Reg, DstReg)) + return true; + return false; + } + + // It returns true if the specified instruction updates SrcReg or DstReg. + bool isConflict(const MachineInstr &MI, const TargetRegisterInfo *TRI, + const PPCInstrInfo *TII) const { + const MCInstrDesc &MCID = TII->get(MI.getOpcode()); + for (unsigned I = 0; I < MCID.getNumDefs(); I++) + if (isConflict(MI.getOperand(I).getReg(), TRI)) + return true; + + if (MCID.ImplicitDefs) + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ImpDef++) + if (isConflict(*ImpDef, TRI)) + return true; + + return false; + } + + // If we see a kill flag for SrcReg, we remember it + // to maintain kill flag later. + void checkKillFlags(const MachineOperand &MO, + const TargetRegisterInfo *TRI) { + assert(MO.isKill()); + if (TRI->isSuperOrSubRegisterEq(MO.getReg(), SrcReg)) + SrcRegKilled = true; + } + +protected: + unsigned SrcReg; + unsigned DstReg; + bool SrcRegKilled; +}; + +// Initialize class variables. +void PPCPostRAPeephole::initialize(MachineFunction &MFParm) { + MF = &MFParm; + TII = MF->getSubtarget().getInstrInfo(); + DEBUG(dbgs() << "*** PowerPC Post RA peephole pass ***\n\n"); + DEBUG(MF->dump()); +} + +// Perform peephole optimizations. +bool PPCPostRAPeephole::optUseAfterRegCopy(void) { + const PPCSubtarget *PPCSubTarget = &MF->getSubtarget(); + const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); + bool Changed = false; + SmallVector EquiPairs; + for (MachineBasicBlock &MBB : *MF) { + // This optimization is local to each BB. So we clear the information + // of equal register pairs used in the previous BB. + EquiPairs.clear(); + for (MachineInstr &MI : MBB) { + unsigned Opc = MI.getOpcode(); + if (MI.isDebugValue()) + continue; + + // We remeber whether we see a kill flag for SrcReg of each pair. + for (auto &MO: MI.operands()) + if (MO.isReg() && MO.isKill()) + for (auto &RegPair: EquiPairs) + RegPair.checkKillFlags(MO, TRI); + + // Refer PPCInstrInfo::copyPhysReg to find opcodes used for copying + // the value in a physical register for each register class. + // Currently we do not optimize VSX registers (copied by xxlor), + // which may involve different register types, i.e. FPR and VRF. + bool IsRegCopy = (Opc == PPC::FMR); + if ((Opc == PPC::OR8 || Opc == PPC::OR || Opc == PPC::VOR) && + MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) + IsRegCopy = true; + + if (IsRegCopy) { + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); + + // We do not optimize gpr0, which may mean constant 0. + if (TRI->isSuperOrSubRegisterEq(DstReg, PPC::X0) || + TRI->isSuperOrSubRegisterEq(SrcReg, PPC::X0)) + continue; + + // Register pairs are eliminated if src or dst is overwitten. + SmallVector CurrentEquiPairs(EquiPairs); + EquiPairs.clear(); + for (auto &RegPair: CurrentEquiPairs) + if (!RegPair.isConflict(MI, TRI, TII)) + EquiPairs.push_back(RegPair); + + // We create a new register pair having the same value. + EquiPairs.push_back(PPCEquiRegPairInfo(SrcReg, DstReg)); + + for (auto &MO: MI.operands()) + if (MO.isReg() && MO.isKill()) + EquiPairs.back().checkKillFlags(MO, TRI); + + continue; + } + + if (EquiPairs.empty()) continue; + + // Currently, we just invalidate all register pairs at a call. + // For further opportunity, we can keep reg pairs + // if both src and dst regs are callee save. + if (MI.isCall()) { + EquiPairs.clear(); + continue; + } + + const MCInstrDesc &MCID = TII->get(Opc); + SmallVector CurrentEquiPairs(EquiPairs); + EquiPairs.clear(); + MachineBasicBlock::reverse_iterator MBBI, MBBIE; + for (auto &RegPair: CurrentEquiPairs) { + const unsigned SrcReg = RegPair.getSrcReg(); + const unsigned DstReg = RegPair.getDstReg(); + + // We check all input registers for finding optimization opportunity. + for (unsigned I = MCID.getNumDefs(); I < MCID.getNumOperands(); I++) { + MachineOperand &MO = MI.getOperand(I); + + if (MO.isReg() && MO.getReg() == DstReg && + !MO.isTied()) { // we do not optimize ldux etc + // Here we identified opportunity to use SrcReg instead of DstReg. + + // If this is the last use of DstReg, we move kill flag + // by reversely iterating instructions + if (MO.isKill()) { + bool found = false; + for (MBBI = MI, MBBIE = MBB.rend(); + MBBI != MBBIE && !found; MBBI++) { + for (int I = MBBI->getNumOperands() - 1; I >= 0; I--) { + MachineOperand &MO2 = MBBI->getOperand(I); + if (MO2.isReg() && + TRI->isSuperOrSubRegisterEq(MO2.getReg(), DstReg)) { + if (!MO2.isDef()) + MO2.setIsKill(true); + else { + // todo: Since there is no use, we can remove register copy + } + found = true; + break; + } + } + assert(found); + } + } + + Changed = true; + MO.setIsKill(false); + MO.setReg(SrcReg); + DEBUG(dbgs() << "An operand modified in\n "); + DEBUG(MI.dump()); + + // If we already seen the use of SrcReg with a kill flag, + // we need to move the kill flag to this instruction. + if (RegPair.needSrcRegKillFlag()) { + bool found = false; + for (MBBI = MI, MBBIE = MBB.rend(); + MBBI != MBBIE && !found; MBBI++) { + for (int I = MBBI->getNumOperands() - 1; I >= 0; I--) { + MachineOperand &MO2 = MBBI->getOperand(I); + if (MO2.isReg() && !MO2.isDef() && MO2.isKill() && + TRI->isSuperOrSubRegisterEq(MO2.getReg(), SrcReg)) { + MO2.setIsKill(false); + found = true; + break; + } + } + } + assert(found); + MO.setIsKill(true); + } + } + } + + // Register pairs are eliminated if src or dst is overwitten. + if (!RegPair.isConflict(MI, TRI, TII)) + EquiPairs.push_back(RegPair); + } + } + } + return Changed; +} + +} // end default namespace + +INITIALIZE_PASS_BEGIN(PPCPostRAPeephole, DEBUG_TYPE, + "PowerPC Post RA Peephole Optimization", false, false) +INITIALIZE_PASS_END(PPCPostRAPeephole, DEBUG_TYPE, + "PowerPC Post RA Peephole Optimization", false, false) + +char PPCPostRAPeephole::ID = 0; +FunctionPass* +llvm::createPPCPostRAPeepholePass() { return new PPCPostRAPeephole(); } Index: lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetMachine.cpp +++ lib/Target/PowerPC/PPCTargetMachine.cpp @@ -64,6 +64,10 @@ opt DisableMIPeephole("disable-ppc-peephole", cl::Hidden, cl::desc("Disable machine peepholes for PPC")); +static cl:: +opt DisablePostRAPeephole("disable-ppc-postra-peephole", cl::Hidden, + cl::desc("Disable post RA peepholes for PPC")); + static cl::opt EnableGEPOpt("ppc-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), @@ -418,6 +422,10 @@ void PPCPassConfig::addPreSched2() { if (getOptLevel() != CodeGenOpt::None) { + // Target-specific peephole optimization after register allocation. + if (!DisablePostRAPeephole) + addPass(createPPCPostRAPeepholePass()); + addPass(&IfConverterID); // This optimization must happen after anything that might do store-to-load Index: test/CodeGen/PowerPC/Frames-large.ll =================================================================== --- test/CodeGen/PowerPC/Frames-large.ll +++ test/CodeGen/PowerPC/Frames-large.ll @@ -27,7 +27,7 @@ ; PPC32-FP: ori r0, r0, 32736 ; PPC32-FP: stwux r1, r1, r0 ; PPC32-FP: mr r31, r1 -; PPC32-FP: addi r3, r31, 32 +; PPC32-FP: addi r3, r1, 32 ; PPC32-FP: lwz r1, 0(r1) ; PPC32-FP: lwz r31, -4(r1) ; PPC32-FP: blr @@ -48,7 +48,7 @@ ; PPC64-FP: ori r0, r0, 32704 ; PPC64-FP: stdux r1, r1, r0 ; PPC64-FP: mr r31, r1 -; PPC64-FP: addi r3, r31, 60 +; PPC64-FP: addi r3, r1, 60 ; PPC64-FP: ld r1, 0(r1) ; PPC64-FP: ld r31, -8(r1) ; PPC64-FP: blr Index: test/CodeGen/PowerPC/eh-dwarf-cfa.ll =================================================================== --- test/CodeGen/PowerPC/eh-dwarf-cfa.ll +++ test/CodeGen/PowerPC/eh-dwarf-cfa.ll @@ -13,7 +13,7 @@ ; CHECK: .cfi_def_cfa_offset [[SS]] ; CHECK: mr 31, 1 ; CHECK: .cfi_def_cfa_register r31 -; CHECK: addi 3, 31, [[SS]] +; CHECK: addi 3, 1, [[SS]] ; CHECK-NEXT: bl _Z1gPv ; CHECK: blr } Index: test/CodeGen/PowerPC/fma-mutate.ll =================================================================== --- test/CodeGen/PowerPC/fma-mutate.ll +++ test/CodeGen/PowerPC/fma-mutate.ll @@ -14,7 +14,7 @@ ret double %r ; CHECK: @foo3 -; CHECK: xsnmsubadp [[REG:[0-9]+]], {{[0-9]+}}, [[REG]] +; CHECK: xsnmsubadp [[REG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} ; CHECK: xsmaddmdp ; CHECK: xsmaddadp } Index: test/CodeGen/PowerPC/save-cr-ppc32svr4.ll =================================================================== --- test/CodeGen/PowerPC/save-cr-ppc32svr4.ll +++ test/CodeGen/PowerPC/save-cr-ppc32svr4.ll @@ -4,11 +4,11 @@ ; CHECK-LABEL: fred: ; CHECK: stwu 1, -32(1) +; CHECK: mfcr [[CR:[0-9]+]] ; CHECK: stw 31, 28(1) -; CHECK: mr 31, 1 ; CHECK: stw 30, 24(1) -; CHECK: mfcr [[CR:[0-9]+]] -; CHECK: stw [[CR]], 20(31) +; CHECK: mr 31, 1 +; CHECK: stw [[CR]], 20(1) target datalayout = "E-m:e-p:32:32-i64:64-n32" target triple = "powerpc-unknown-freebsd" Index: test/CodeGen/PowerPC/sjlj.ll =================================================================== --- test/CodeGen/PowerPC/sjlj.ll +++ test/CodeGen/PowerPC/sjlj.ll @@ -69,9 +69,9 @@ ; CHECK-DAG: stxvd2x ; CHECK-DAG: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha -; CHECK-DAG: std 31, env_sigill@toc@l([[REG]]) +; CHECK-DAG: std 1, env_sigill@toc@l([[REG]]) ; CHECK-DAG: addi [[REGA:[0-9]+]], [[REG]], env_sigill@toc@l -; CHECK-DAG: std [[REGA]], [[OFF:[0-9]+]](31) # 8-byte Folded Spill +; CHECK-DAG: std [[REGA]], [[OFF:[0-9]+]](1) # 8-byte Folded Spill ; CHECK-DAG: std 1, 16([[REGA]]) ; CHECK-DAG: std 2, 24([[REGA]]) ; CHECK: bcl 20, 31, .LBB1_3