Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -38,6 +38,7 @@ FunctionPass *createAMDGPUCFGStructurizerPass(); // SI Passes +FunctionPass *createGCNPeepholeOptimizerPass(); FunctionPass *createSITypeRewriter(); FunctionPass *createSIAnnotateControlFlowPass(); FunctionPass *createSIFoldOperandsPass(); @@ -59,6 +60,9 @@ void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); extern char &AMDGPUAnnotateKernelFeaturesID; +void initializeGCNPeepholeOptimizerPass(PassRegistry &); +extern char &GCNPeepholeOptimizerID; + void initializeSIFoldOperandsPass(PassRegistry &); extern char &SIFoldOperandsID; Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -372,6 +372,7 @@ } void GCNPassConfig::addPreEmitPass() { + addPass(createGCNPeepholeOptimizerPass(), false); addPass(createSIInsertWaitsPass(), false); addPass(createSILowerControlFlowPass(), false); if (InsertNops) { Index: lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -33,6 +33,7 @@ AMDGPUInstrInfo.cpp AMDGPUPromoteAlloca.cpp AMDGPURegisterInfo.cpp + GCNPeepholeOptimizer.cpp R600ClauseMergePass.cpp R600ControlFlowFinalizer.cpp R600EmitClauseMarkers.cpp Index: lib/Target/AMDGPU/GCNPeepholeOptimizer.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/GCNPeepholeOptimizer.cpp @@ -0,0 +1,120 @@ +//===-- GCNPeepholeOptimizer.cpp - Peephole optimizations for GCN ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// This contains some peeophole optimizations for GCN targets. Once +/// GlobalISel is finished, this pass may be used as a replacement for our +/// target dag combines. +/// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineDominanceFrontier.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constants.h" + +using namespace llvm; + +#define DEBUG_TYPE "gcn-peephole" + +namespace { + +class GCNPeepholeOptimizer : public MachineFunctionPass { +private: + const SIInstrInfo *TII; + const SIRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + MachineBasicBlock::iterator optimizeS_NOP(MachineBasicBlock::iterator I) const; + +public: + static char ID; + + GCNPeepholeOptimizer() : + MachineFunctionPass(ID) { } + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "GCN Peephole optimizer"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // End anonymous namespace + +char GCNPeepholeOptimizer::ID = 0; + +INITIALIZE_PASS_BEGIN(GCNPeepholeOptimizer, DEBUG_TYPE, + "GCN Peephole Optimizer", false, false) +INITIALIZE_PASS_END(GCNPeepholeOptimizer, DEBUG_TYPE, + "GCN Peephole Optimizer", false, false) + +char &llvm::GCNPeepholeOptimizerID = GCNPeepholeOptimizer::ID; + +FunctionPass *llvm::createGCNPeepholeOptimizerPass() { + return new GCNPeepholeOptimizer; +} + +/// Combine consecutive S_NOP instructions into as few as possible. +MachineBasicBlock::iterator GCNPeepholeOptimizer::optimizeS_NOP(MachineBasicBlock::iterator I) const { + MachineBasicBlock::iterator Nop = std::next(I); + MachineBasicBlock *MBB = I->getParent(); + MachineBasicBlock::iterator End = MBB->end(); + + // Bail early if we don't have consecutive S_NOPs. + if (Nop == End || Nop->getOpcode() != AMDGPU::S_NOP) + return Nop; + + unsigned NopCount = 0; + + MachineBasicBlock::iterator Next; + for (Nop = I; Nop->getOpcode() == AMDGPU::S_NOP && Nop != End; Nop = Next) { + Next = std::next(Nop); + + NopCount += 1 + Nop->getOperand(0).getImm(); + Nop->eraseFromParent(); + } + + TII->insertWaitStates(*MBB, Nop, NopCount); + return Nop; +} + +bool GCNPeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { + + TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) { + + MachineBasicBlock &MBB = *BI; + MachineBasicBlock::iterator I, Next; + for (I = MBB.begin(); I != MBB.end(); I = Next) { + Next = std::next(I); + MachineInstr &MI = *I; + if (MI.getOpcode() == AMDGPU::S_NOP) { + Next = optimizeS_NOP(I); + continue; + } + } + } + return true; +}