Index: include/llvm/ADT/SetVector.h =================================================================== --- include/llvm/ADT/SetVector.h +++ include/llvm/ADT/SetVector.h @@ -113,6 +113,12 @@ } /// \brief Return the last element of the SetVector. + const T &front() const { + assert(!empty() && "Cannot call front() on empty SetVector!"); + return vector_.front(); + } + + /// \brief Return the last element of the SetVector. const T &back() const { assert(!empty() && "Cannot call back() on empty SetVector!"); return vector_.back(); Index: include/llvm/CodeGen/CommandFlags.h =================================================================== --- include/llvm/CodeGen/CommandFlags.h +++ include/llvm/CodeGen/CommandFlags.h @@ -181,6 +181,18 @@ "Only fuse FP ops when the result won't be affected."), clEnumValEnd)); +cl::opt +CFGStructurizerType("cfg-structurizer", + cl::Hidden, + cl::desc("Specify early or late CFG structurization for AMDGPU target"), + cl::init(CFGStructurizer::Early), + cl::values( + clEnumValN(CFGStructurizer::Early, "early", + "Structurize IR before code generation"), + clEnumValN(CFGStructurizer::Late, "late", + "Structurize after code generation"), + clEnumValEnd)); + cl::list ReciprocalOps("recip", cl::CommaSeparated, @@ -285,6 +297,7 @@ TargetOptions Options; Options.LessPreciseFPMADOption = EnableFPMAD; Options.AllowFPOpFusion = FuseFPOps; + Options.Structurizer = CFGStructurizerType; Options.Reciprocals = TargetRecip(ReciprocalOps); Options.UnsafeFPMath = EnableUnsafeFPMath; Options.NoInfsFPMath = EnableNoInfsFPMath; Index: include/llvm/CodeGen/MachineBasicBlock.h =================================================================== --- include/llvm/CodeGen/MachineBasicBlock.h +++ include/llvm/CodeGen/MachineBasicBlock.h @@ -438,12 +438,22 @@ /// other block. bool isLayoutSuccessor(const MachineBasicBlock *MBB) const; - /// Return true if the block can implicitly transfer control to the block - /// after it by falling off the end of it. This should return false if it can - /// reach the block after it, but it uses an explicit branch to do so (e.g., a - /// table jump). True is a conservative answer. + + /// Return the fallthrough block if the block can implicitly + /// transfer control to the block after it by falling off the end of + /// it. This should return null if it can reach the block after + /// it, but it uses an explicit branch to do so (e.g., a table + /// jump). Non-null return is a conservative answer. + MachineBasicBlock *getFallThrough(); + + /// Return true if the block can implicitly transfer control to the + /// block after it by falling off the end of it. This should return + /// false if it can reach the block after it, but it uses an + /// explicit branch to do so (e.g., a table jump). True is a + /// conservative answer. bool canFallThrough(); + /// Returns a pointer to the first instruction in this block that is not a /// PHINode instruction. When adding instructions to the beginning of the /// basic block, they should be added before the returned value, not before @@ -593,6 +603,10 @@ Insts.clear(); } + /// Split a block right before 'Where' and create two blocks in sequence. + /// Return the new basic block created as a successor after the original + static MachineBasicBlock *split(iterator Where); + /// Take an instruction from MBB 'Other' at the position From, and insert it /// into this MBB right before 'Where'. /// Index: include/llvm/CodeGen/MachineRegisterInfo.h =================================================================== --- include/llvm/CodeGen/MachineRegisterInfo.h +++ include/llvm/CodeGen/MachineRegisterInfo.h @@ -443,13 +443,23 @@ /// register. bool use_empty(unsigned RegNo) const { return use_begin(RegNo) == use_end(); } + + /// hasNUses - Return true if there are exactly N instructions using the + /// specified register. + bool hasNUses(unsigned RegNo, unsigned N) const { + use_iterator UI = use_begin(RegNo); + use_iterator UE = use_end(); + while (UI != UE) { + ++UI; + --N; + } + return N == 0; + } + /// hasOneUse - Return true if there is exactly one instruction using the /// specified register. bool hasOneUse(unsigned RegNo) const { - use_iterator UI = use_begin(RegNo); - if (UI == use_end()) - return false; - return ++UI == use_end(); + return hasNUses(RegNo, 1); } /// use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -72,6 +72,9 @@ /// MachineDominanaceFrontier - This pass is a machine dominators analysis pass. extern char &MachineDominanceFrontierID; + /// MachineRegionInfo - This pass computes SESE regions for machine functions. + extern char &MachineRegionInfoPassID; + /// EdgeBundles analysis - Bundle machine CFG edges. extern char &EdgeBundlesID; @@ -131,6 +134,11 @@ /// This pass adds dead/undef flags after analyzing subregister lanes. extern char &DetectDeadLanesID; + /// MachineCFGStructurizer - This pass performs machine code structurization. + extern char &MachineCFGStructurizerID; + + FunctionPass *createMachineCFGStructurizer(); + /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. /// Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -213,6 +213,7 @@ void initializeMachineBlockPlacementPass(PassRegistry&); void initializeMachineBlockPlacementStatsPass(PassRegistry&); void initializeMachineBranchProbabilityInfoPass(PassRegistry&); +void initializeMachineCFGStructurizerPass(PassRegistry&); void initializeMachineCSEPass(PassRegistry&); void initializeMachineCombinerPass(PassRegistry &); void initializeMachineCopyPropagationPass(PassRegistry&); Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -569,6 +569,15 @@ llvm_unreachable("Target didn't implement ReduceLoopCount"); } + /// Insert unconditionalbranch code into the end of the specified + /// MachineBasicBlock. + unsigned InsertUnconditionalBranch(MachineBasicBlock &MBB, + MachineBasicBlock *DBB, + DebugLoc DL) const { + return InsertBranch(MBB, DBB, nullptr, SmallVector(), + DL); + } + /// Delete the instruction OldInst and everything after it, replacing it with /// an unconditional branch to NewDest. This is used by the tail merging pass. virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, @@ -671,6 +680,14 @@ return false; } + /// Return the register class that is preferred to be used with select + /// instructions by the target, given the minimum size in bits. + virtual const TargetRegisterClass * + getPreferredSelectRegClass(unsigned Size) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::getPreferred" + "SelectRegClass!"); + } + /// Return true if it is possible to insert a select /// instruction that chooses between TrueReg and FalseReg based on the /// condition code in Cond. @@ -899,6 +916,25 @@ /// Return true when a target supports MachineCombiner. virtual bool useMachineCombiner() const { return false; } + + /// Handle simple if region + virtual void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, + MachineBasicBlock *IfEnd) const + { + llvm_unreachable("Target didn't implement" + " TargetInstrInfo::convertNonUniformIfRegion"); + + } + + /// Handle simple loop region + virtual void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, + MachineBasicBlock *LoopEnd) const + { + llvm_unreachable("Target didn't implement" + " TargetInstrInfo::convertNonUniformLoopRegion"); + + } + protected: /// Target-dependent implementation for foldMemoryOperand. /// Target-independent code in foldMemoryOperand will @@ -1065,6 +1101,24 @@ return true; } + /// Insert assign + virtual void insertAssign(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL, + unsigned DstReg, + unsigned Value) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::InsertAssign"); + } + + /// Insert assign + void insertClear(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL, + unsigned DstReg) const { + // The target will have to create a special case when 0 is passed. + insertAssign(MBB, MI, DL, DstReg, 0); + } + /// Insert a noop into the instruction stream at the specified point. virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; @@ -1161,6 +1215,14 @@ /// targets may choose to honor. bool usePreRAHazardRecognizer() const; + /// Insert equality compare instruction betwen a register and a + /// constant, and return the register where the result is stored. + virtual unsigned insertNE(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned SrcReg, int Value) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::insertEQ"); + } + /// For a comparison instruction, return the source registers /// in SrcReg and SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction Index: include/llvm/Target/TargetOptions.h =================================================================== --- include/llvm/Target/TargetOptions.h +++ include/llvm/Target/TargetOptions.h @@ -64,6 +64,13 @@ GNU }; + namespace CFGStructurizer { + enum Type { + Early, // IR Level + Late // Machine Level + }; + } + /// Identify a debugger for "tuning" the debug info. /// /// The "debugger tuning" concept allows us to present a more intuitive @@ -104,7 +111,8 @@ AllowFPOpFusion(FPOpFusion::Standard), Reciprocals(TargetRecip()), JTType(JumpTable::Single), ThreadModel(ThreadModel::POSIX), EABIVersion(EABI::Default), DebuggerTuning(DebuggerKind::Default), - ExceptionModel(ExceptionHandling::None) {} + ExceptionModel(ExceptionHandling::None), + Structurizer(CFGStructurizer::Early) {} /// PrintMachineCode - This flag is enabled when the -print-machineinstrs /// option is specified on the command line, and should enable debugging @@ -256,6 +264,9 @@ /// What exception model to use ExceptionHandling ExceptionModel; + /// What structurizer to use + CFGStructurizer::Type Structurizer; + /// Machine level options. MCTargetOptions MCOptions; }; @@ -280,6 +291,7 @@ ARE_EQUAL(EmulatedTLS) && ARE_EQUAL(FloatABIType) && ARE_EQUAL(AllowFPOpFusion) && + ARE_EQUAL(Structurizer) && ARE_EQUAL(Reciprocals) && ARE_EQUAL(JTType) && ARE_EQUAL(ThreadModel) && Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -54,6 +54,7 @@ MachineBlockFrequencyInfo.cpp MachineBlockPlacement.cpp MachineBranchProbabilityInfo.cpp + MachineCFGStructurizer.cpp MachineCombiner.cpp MachineCopyPropagation.cpp MachineCSE.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -44,6 +44,7 @@ initializeMachineBlockFrequencyInfoPass(Registry); initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); + initializeMachineCFGStructurizerPass(Registry); initializeMachineCSEPass(Registry); initializeImplicitNullChecksPass(Registry); initializeMachineCombinerPass(Registry); @@ -55,6 +56,7 @@ initializeMachineModuleInfoPass(Registry); initializeMachinePipelinerPass(Registry); initializeMachinePostDominatorTreePass(Registry); + initializeMachineRegionInfoPassPass(Registry); initializeMachineSchedulerPass(Registry); initializeMachineSinkingPass(Registry); initializeMachineVerifierPassPass(Registry); Index: lib/CodeGen/MachineBasicBlock.cpp =================================================================== --- lib/CodeGen/MachineBasicBlock.cpp +++ lib/CodeGen/MachineBasicBlock.cpp @@ -90,6 +90,13 @@ MachineBasicBlock *N) { N->getParent()->removeFromMBBNumbering(N->Number); N->Number = -1; + + // Make sure the instructions have their operands in the reginfo lists. + // MachineFunction &MF = *N->getParent(); + // MachineRegisterInfo &RegInfo = MF.getRegInfo(); + // for (MachineBasicBlock::instr_iterator + // I = N->instr_begin(), E = N->instr_end(); I != E; ++I) + // I->RemoveRegOperandsFromUseLists(RegInfo); } /// When we add an instruction to a basic block list, we update its parent @@ -671,16 +678,16 @@ return std::next(I) == MachineFunction::const_iterator(MBB); } -bool MachineBasicBlock::canFallThrough() { +MachineBasicBlock *MachineBasicBlock::getFallThrough() { MachineFunction::iterator Fallthrough = getIterator(); ++Fallthrough; // If FallthroughBlock is off the end of the function, it can't fall through. if (Fallthrough == getParent()->end()) - return false; + return nullptr; // If FallthroughBlock isn't a successor, no fallthrough is possible. if (!isSuccessor(&*Fallthrough)) - return false; + return nullptr; // Analyze the branches, if any, at the end of the block. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; @@ -692,25 +699,30 @@ // is possible. The isPredicated check is needed because this code can be // called during IfConversion, where an instruction which is normally a // Barrier is predicated and thus no longer an actual control barrier. - return empty() || !back().isBarrier() || TII->isPredicated(back()); + return (empty() || !back().isBarrier() || TII->isPredicated(back())) ? + &*Fallthrough : nullptr; } // If there is no branch, control always falls through. - if (!TBB) return true; + if (!TBB) return &*Fallthrough; // If there is some explicit branch to the fallthrough block, it can obviously // reach, even though the branch should get folded to fall through implicitly. if (MachineFunction::iterator(TBB) == Fallthrough || MachineFunction::iterator(FBB) == Fallthrough) - return true; + return &*Fallthrough; // If it's an unconditional branch to some block not the fall through, it // doesn't fall through. - if (Cond.empty()) return false; + if (Cond.empty()) return nullptr; // Otherwise, if it is conditional and has no explicit false block, it falls // through. - return FBB == nullptr; + return (FBB == nullptr) ? &*Fallthrough : nullptr; +} + +bool MachineBasicBlock::canFallThrough() { + return getFallThrough() != nullptr; } MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, @@ -986,6 +998,22 @@ return true; } +MachineBasicBlock *MachineBasicBlock::split(iterator I) { + // Create the fall-through block. + MachineBasicBlock *MBB = (*I).getParent(); + MachineFunction *MF = MBB->getParent(); + MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock(); + auto MBBIter = ++(MBB->getIterator()); + MF->insert(MBBIter, SuccMBB); + SuccMBB->transferSuccessorsAndUpdatePHIs(MBB); + MBB->addSuccessor(SuccMBB); + + // Splice the code over. + SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end()); + + return SuccMBB; +} + /// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's /// neighboring instructions so the bundle won't be broken by removing MI. static void unbundleSingleMI(MachineInstr *MI) { Index: lib/CodeGen/MachineCFGStructurizer.cpp =================================================================== --- /dev/null +++ lib/CodeGen/MachineCFGStructurizer.cpp @@ -0,0 +1,2664 @@ +//===-- MachineCFGStructurizer.cpp - Machine code if conversion pass. -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine instruction level CFG structurizer pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegionInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" + +using namespace llvm; + +#define DEBUG_TYPE "machinecfgstructurizer" + +namespace { + class PHILinearizeDestIterator; + + class PHILinearize { + friend class PHILinearizeDestIterator; + + private: + typedef DenseSet PHISourcesT; + typedef struct {unsigned DestReg; DebugLoc DL; PHISourcesT Sources;} + PHIInfoElementT; + typedef SmallPtrSet< PHIInfoElementT *, 2> PHIInfoT; + PHIInfoT PHIInfo; + + static unsigned phiInfoElementGetDest(PHIInfoElementT *Info); + static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef); + static DebugLoc phiInfoElementGetDebugLoc(PHIInfoElementT *Info); + static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info); + static void phiInfoElementAddSource(PHIInfoElementT *Info, + unsigned SourceReg); + static void phiInfoElementRemoveSource(PHIInfoElementT *Info, + unsigned SourceReg); + PHIInfoElementT *findPHIInfoElement(unsigned DestReg); + PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg); + + public: + void addDest(unsigned DestReg, DebugLoc DL); + void replaceDef(unsigned OldDestReg, unsigned NewDestReg); + void deleteDef(unsigned DestReg); + DebugLoc getDebugLoc(unsigned DestReg); + void addSource(unsigned DestReg, unsigned SourceReg); + void removeSource(unsigned DestReg, unsigned SourceReg); + bool findDest(unsigned SourceReg, unsigned &DestReg); + bool isSource(unsigned Reg); + unsigned getNumSources(unsigned DestReg); + void dump(MachineRegisterInfo *MRI); + void clear(); + + typedef PHISourcesT::iterator source_iterator; + typedef PHILinearizeDestIterator dest_iterator; + + dest_iterator dests_begin(); + dest_iterator dests_end(); + + source_iterator sources_begin(unsigned Reg); + source_iterator sources_end(unsigned Reg); + }; + + class PHILinearizeDestIterator { + private: + PHILinearize::PHIInfoT::iterator Iter; + public: + unsigned operator*() { + return PHILinearize::phiInfoElementGetDest(*Iter); + } + PHILinearizeDestIterator& operator++() { + ++Iter; + return *this; + } + bool operator==(const PHILinearizeDestIterator &I) const { + return I.Iter == Iter; + } + bool operator!=(const PHILinearizeDestIterator &I) const { + return I.Iter != Iter; + } + + PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : + Iter(I) {} + }; + + unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) { + return Info->DestReg; + } + + void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info, + unsigned NewDef) { + Info->DestReg = NewDef; + } + + DebugLoc PHILinearize::phiInfoElementGetDebugLoc(PHIInfoElementT *Info) { + return Info->DL; + } + + PHILinearize::PHISourcesT & + PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) { + return Info->Sources; + } + + void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info, + unsigned SourceReg) { + phiInfoElementGetSources(Info).insert(SourceReg); + } + + void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info, + unsigned SourceReg) { + phiInfoElementGetSources(Info).erase(SourceReg); + } + + PHILinearize::PHIInfoElementT * + PHILinearize::findPHIInfoElement(unsigned DestReg) { + for (auto I : PHIInfo) { + if (phiInfoElementGetDest(I) == DestReg) { + return I; + } + } + return nullptr; + } + + PHILinearize::PHIInfoElementT * + PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg) { + for (auto I : PHIInfo) { + for (auto SI : phiInfoElementGetSources(I)) { + if (SI == SourceReg) { + return I; + } + } + } + return nullptr; + } + + void PHILinearize::addDest(unsigned DestReg, DebugLoc DL) { + assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists"); + PHISourcesT EmptySet; + PHIInfoElementT *NewElement = new PHIInfoElementT(); + NewElement->DestReg = DestReg; + NewElement->DL = DL; + NewElement->Sources = EmptySet; + PHIInfo.insert(NewElement); + } + + void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) { + phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg); + } + + void PHILinearize::deleteDef(unsigned DestReg) { + PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg); + PHIInfo.erase(InfoElement); + delete InfoElement; + } + + DebugLoc PHILinearize::getDebugLoc(unsigned DestReg) { + return phiInfoElementGetDebugLoc(findPHIInfoElement(DestReg)); + } + + void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg) { + phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg); + } + + void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg) { + phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg); + } + + bool PHILinearize::findDest(unsigned SourceReg, unsigned &DestReg) { + PHIInfoElementT *InfoElement = findPHIInfoElementFromSource(SourceReg); + if (InfoElement != nullptr) { + DestReg = phiInfoElementGetDest(InfoElement); + return true; + } + return false; + } + + bool PHILinearize::isSource(unsigned Reg) { + unsigned DestReg; + return findDest(Reg, DestReg); + } + + unsigned PHILinearize::getNumSources(unsigned DestReg) { + return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size(); + } + + void PHILinearize::dump(MachineRegisterInfo *MRI) { + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + DEBUG(dbgs() << "=PHIInfo Start=\n"); + for (auto PII : this->PHIInfo) { + PHIInfoElementT &Element = *PII; + DEBUG(dbgs() << "Dest: " << PrintReg(Element.DestReg, TRI) << + " Sources: {"); + for (auto &SI : Element.Sources) { + DEBUG(dbgs() << PrintReg(SI, TRI) << ","); + } + DEBUG(dbgs() <<"}\n"); + } + DEBUG(dbgs() << "=PHIInfo End=\n"); + } + + void PHILinearize::clear() { + PHIInfo = PHIInfoT(); + } + + PHILinearize::dest_iterator PHILinearize::dests_begin() { + return PHILinearizeDestIterator(PHIInfo.begin()); + } + + PHILinearize::dest_iterator PHILinearize::dests_end() { + return PHILinearizeDestIterator(PHIInfo.end()); + } + + PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) + { + auto InfoElement = findPHIInfoElement(Reg); + return phiInfoElementGetSources(InfoElement).begin(); + } + PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) + { + auto InfoElement = findPHIInfoElement(Reg); + return phiInfoElementGetSources(InfoElement).end(); + } + + class RegionMRT; + class MBBMRT; + + static unsigned getPHINumInputs(MachineInstr &PHI) { + assert(PHI.isPHI()); + return (PHI.getNumOperands()-1)/2; + } + + static MachineBasicBlock *getPHIPred(MachineInstr &PHI, unsigned Index) { + assert(PHI.isPHI()); + return PHI.getOperand(Index*2+2).getMBB(); + } + + static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) { + assert(PHI.isPHI()); + return PHI.getOperand(Index*2+1).getReg(); + } + + static unsigned getPHIDestReg(MachineInstr &PHI) { + assert(PHI.isPHI()); + return PHI.getOperand(0).getReg(); + } + + class LinearizedRegion { + private: + MachineBasicBlock *Entry; + // Note: This exit block is part of the region, and is the last + // merge block before exiting the region. + MachineBasicBlock *Exit; + DenseSet LiveOuts; + SmallPtrSet MBBs; + unsigned BBSelectRegIn; + unsigned BBSelectRegOut; + bool HasLoop; + LinearizedRegion *Parent; + + void storeLiveOutReg(MachineBasicBlock *MBB, + unsigned Reg, + MachineInstr *DefInstr, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo); + + void storeLiveOuts(MachineBasicBlock *MBB, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo); + + void storeLiveOuts(RegionMRT *Region, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo); + + public: + void setParent(LinearizedRegion *P) { Parent = P; } + + LinearizedRegion *getParent() { return Parent; } + + void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr); + + void setBBSelectRegIn(unsigned Reg); + + unsigned getBBSelectRegIn(); + + void setBBSelectRegOut(unsigned Reg); + + unsigned getBBSelectRegOut(); + + void setHasLoop(bool Value); + + bool getHasLoop(); + + void addLiveOut(unsigned VReg); + + void addLiveOuts(LinearizedRegion *LRegion); + + void removeLiveOut(unsigned Reg); + + void replaceLiveOut(unsigned OldReg, unsigned NewReg); + + void replaceRegister(unsigned Register, unsigned NewRegister, + MachineRegisterInfo *MRI, bool ReplaceInside, + bool ReplaceOutside, bool IncludeLoopPHIs); + + void replaceRegisterInsideRegion(unsigned Register, + unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI); + + void replaceRegisterOutsideRegion(unsigned Register, + unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI); + + DenseSet *getLiveOuts(); + + void setEntry(MachineBasicBlock *NewEntry); + + MachineBasicBlock *getEntry(); + + void setExit(MachineBasicBlock *NewExit); + + MachineBasicBlock *getExit(); + + void addMBB(MachineBasicBlock *MBB); + + bool contains(MachineBasicBlock *MBB); + + bool isLiveOut(unsigned Reg); + + bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI); + + bool isDefinedInRegion(unsigned Reg, MachineRegisterInfo *MRI); + + void removeFalseRegisterKills(MachineRegisterInfo *MRI); + + void initLiveOut(RegionMRT *Region, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo); + + LinearizedRegion(MachineBasicBlock *MBB, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo); + + LinearizedRegion(RegionMRT *Region, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo); + + LinearizedRegion(); + + ~LinearizedRegion(); + }; + + class MRT { + RegionMRT *Parent; + + public: + virtual RegionMRT *getRegionMRT() { + return nullptr; + } + + virtual MBBMRT *getMBBMRT() { + return nullptr; + } + + bool isRegion() { + return getRegionMRT() != nullptr; + } + + bool isMBB() { + return getMBBMRT() != nullptr; + } + + bool isRoot() { return Parent == nullptr; } + + void setParent(RegionMRT *Region) { + Parent = Region; + } + + RegionMRT *getParent() { + return Parent; + } + + static MachineBasicBlock *initializeMRT(MachineFunction &MF, + const MachineRegionInfo *RegionInfo, + DenseMap &RegionMap); + + static RegionMRT *buildMRT(MachineFunction &MF, + const MachineRegionInfo *RegionInfo); + + virtual void dump(const TargetRegisterInfo* TRI, int depth = 0) = 0; + + void dumpDepth(int depth) { + for (int i = depth; i > 0; --i) { + dbgs() << " "; + } + } + + virtual ~MRT() {} + }; + + class MBBMRT : public MRT { + MachineBasicBlock *MBB; + + public: + virtual MBBMRT *getMBBMRT() { + return this; + } + + MachineBasicBlock *getMBB() { + return MBB; + } + + virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { + dumpDepth(depth); + dbgs() << "MBB: " << getMBB()->getNumber() << "\n"; + } + + MBBMRT(MachineBasicBlock *BB) : MBB(BB) { + setParent(nullptr); + } + }; + + class RegionMRT : public MRT { + protected: + MachineRegion *Region; + LinearizedRegion *LRegion; + MachineBasicBlock *Succ; + SetVector Children; + + public: + virtual RegionMRT *getRegionMRT() { + return this; + } + + void setLinearizedRegion(LinearizedRegion *LinearizeRegion) { + LRegion = LinearizeRegion; + } + + LinearizedRegion *getLinearizedRegion() { + return LRegion; + } + + MachineRegion *getMachineRegion() { + return Region; + } + + void addChild(MRT *Tree) { + Children.insert(Tree); + } + + SetVector *getChildren() { + return &Children; + } + + virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { + dumpDepth(depth); + dbgs() << "Region: " << (void*)Region << "\n"; + + if (LRegion) { + dumpDepth(depth); + dbgs() << "SelectRegIn: " << + PrintReg(LRegion->getBBSelectRegIn(), TRI) << "\n"; + dumpDepth(depth); + dbgs() << "SelectRegOut: " << + PrintReg(LRegion->getBBSelectRegOut(), TRI) << "\n"; + } + + dumpDepth(depth); + if (getSucc()) + dbgs() << "Succ: " << getSucc()->getNumber() << "\n"; + else + dbgs() << "Succ: none \n"; + for (auto MRTI : Children) { + MRTI->dump(TRI, depth+1); + } + } + + MRT *getEntryTree() { + return Children.back(); + } + + MRT *getExitTree() { + return Children.front(); + } + + MachineBasicBlock *getEntry() { + MRT *Tree = Children.back(); + return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry() + : Tree->getMBBMRT()->getMBB(); + } + + MachineBasicBlock *getExit() { + MRT *Tree = Children.front(); + return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit() + : Tree->getMBBMRT()->getMBB(); + } + + void setSucc(MachineBasicBlock *MBB) { + Succ = MBB; + } + + MachineBasicBlock *getSucc() { + return Succ; + } + + bool contains(MachineBasicBlock *MBB) { + for (auto CI : Children) { + if (CI->isMBB()) { + if (MBB == CI->getMBBMRT()->getMBB()) { + return true; + } + } else { + if (CI->getRegionMRT()->contains(MBB)) { + return true; + } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr && + CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) { + return true; + } + } + } + return false; + } + + void replaceLiveOutReg(unsigned Register, unsigned NewRegister) { + LinearizedRegion *LRegion = getLinearizedRegion(); + LRegion->replaceLiveOut(Register, NewRegister); + for (auto &CI : Children) { + if (CI->isRegion()) { + CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister); + } + } + } + + RegionMRT(MachineRegion *MachineRegion) : Region(MachineRegion), + LRegion(nullptr), + Succ(nullptr) + { + setParent(nullptr); + } + + virtual ~RegionMRT() { + if (LRegion) { + delete LRegion; + } + + for (auto CI : Children) { + delete &(*CI); + } + } + }; + + MachineBasicBlock* MRT::initializeMRT(MachineFunction &MF, + const MachineRegionInfo *RegionInfo, + DenseMap &RegionMap) { + for (auto &MFI : MF) { + MachineBasicBlock *ExitMBB = &MFI; + if (ExitMBB->succ_size() == 0) { + MRT *ExitMRT = new MBBMRT(ExitMBB); + RegionMap[RegionInfo->getRegionFor(ExitMBB)]->addChild(ExitMRT); + return ExitMBB; + } + } + llvm_unreachable("CFG has no exit block"); + return nullptr; + } + + // static bool isExitMBB(MachineBasicBlock *MBB) { + // return MBB->succ_size() == 0; + // } + + RegionMRT *MRT::buildMRT(MachineFunction &MF, + const MachineRegionInfo *RegionInfo) { + SmallPtrSet PlacedRegions; + DenseMap RegionMap; + MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion(); + RegionMRT *Result = new RegionMRT(TopLevelRegion); + RegionMap[TopLevelRegion] = Result; + + // Insert the exit block first, we need it to be the merge node + // for the top level region. + //MachineBasicBlock *Exit = &(MF.back()); + //MRT *ExitMRT = new MBBMRT(Exit); + //RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT); + MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap); + + for (auto MBBI : post_order(&(MF.front()))) { + MachineBasicBlock *MBB = &(*MBBI); + + // Skip Exit since we already added it + if (MBB == Exit) { + continue; + } + + DEBUG(dbgs() << "Visiting BB#" << MBB->getNumber() << "\n"); + MRT *NewMBB = new MBBMRT(MBB); + + MachineRegion *Region = RegionInfo->getRegionFor(MBB); + + // Ensure we have the MRT region + if (RegionMap.count(Region) == 0) { + RegionMRT *NewMRTRegion = new RegionMRT(Region); + RegionMap[Region] = NewMRTRegion; + + // Ensure all parents are in the RegionMap + MachineRegion *Parent = Region->getParent(); + while (RegionMap.count(Parent) == 0) { + RegionMRT *NewMRTParent = new RegionMRT(Parent); + NewMRTParent->addChild(NewMRTRegion); + RegionMap[Parent] = NewMRTParent; + NewMRTRegion = NewMRTParent; + Parent = Parent->getParent(); + } + RegionMap[Parent]->addChild(NewMRTRegion); + } + + // Add MBB to Region MRT + RegionMap[Region]->addChild(NewMBB); + + // Find the successor node to this region. + if (RegionMap[Region]->getSucc() == nullptr) { + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++SI) { + if (!Region->contains(*SI)) { + RegionMap[Region]->setSucc(*SI); + } + } + } + } + return Result; + } + + void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, + unsigned Reg, + MachineInstr *DefInstr, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + if (TRI->isVirtualRegister(Reg)) { + DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); + // If this is a source register to a PHI we are chaining, it + // must be live out. + if (PHIInfo.isSource(Reg)) { + DEBUG(dbgs() << "Add LiveOut (PHI): " << PrintReg(Reg, TRI) << "\n"); + addLiveOut(Reg); + } else { + // If this is live out of the MBB + for (auto &UI : MRI->use_operands(Reg)) { + if (UI.getParent()->getParent() != MBB) { // || + // PHIInfo.isSource(Reg)) { + DEBUG(dbgs() << "Add LiveOut (MBB BB#" << MBB->getNumber() << "): " << PrintReg(Reg, TRI) << "\n"); + addLiveOut(Reg); + } else { + // If the use is in the same MBB we have to make sure + // it is after the def, otherwise it is live out in a loop + MachineInstr *UseInstr = UI.getParent(); + for (MachineBasicBlock::instr_iterator MII = UseInstr->getIterator(), + MIE = UseInstr->getParent()->instr_end(); MII != MIE; ++MII) { + if ((&(*MII)) == DefInstr) { + DEBUG(dbgs() << "Add LiveOut (Loop): " << PrintReg(Reg, TRI) << "\n"); + addLiveOut(Reg); + } + } + } + } + } + } + } + + void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + DEBUG(dbgs() << "-Store Live Outs Begin (BB#" << MBB->getNumber() << ")-\n"); + for (auto &II : *MBB) { + for (auto &RI : II.defs()) { + storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo); + } + for (auto &IRI : II.implicit_operands()) { + if (IRI.isDef()) { + storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo); + } + } + } + + // If we have a successor with a PHI, source coming from this MBB we have to add + // the register as live out + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++SI) { + for (auto &II : *(*SI)) { + if(II.isPHI()) { + MachineInstr &PHI = II; + int numPreds = getPHINumInputs(PHI); + for (int i = 0; i < numPreds; ++i) { + if (getPHIPred(PHI, i) == MBB) { + unsigned PHIReg = getPHISourceReg(PHI, i); + DEBUG(dbgs() << "Add LiveOut (PhiSource BB#" << MBB->getNumber() << + " -> BB#" << (*SI)->getNumber() << "): " << PrintReg(PHIReg, TRI) << "\n"); + addLiveOut(PHIReg); + } + } + } + } + } + + DEBUG(dbgs() << "-Store Live Outs Endn-\n"); + } + + void LinearizedRegion::storeLiveOuts(RegionMRT *Region, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + // Fixme: Temporary imlementation, need to have real live out info + + MachineBasicBlock *Exit = Region->getSucc(); + + // Check if exit is end of function, if so, no live outs. + if (Exit == nullptr) + return; + + for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(), + E = Exit->pred_end(); PI != E; ++PI) { + if (Region->contains(*PI)) { + storeLiveOuts(*PI, MRI, TRI, PHIInfo); + } + } + } + + void LinearizedRegion::print(raw_ostream &OS, + const TargetRegisterInfo *TRI) { + OS << "Linearized Region (" << Entry->getNumber() << ", " << + (Exit == nullptr ? -1 : Exit->getNumber()) << "): In:" << + PrintReg(getBBSelectRegIn(), TRI) << + " Out:" << PrintReg(getBBSelectRegOut(), TRI) << " {"; + for (auto &LI : LiveOuts) { + OS << PrintReg(LI, TRI) << " "; + } + OS << "} \n"; + } + + void LinearizedRegion::setBBSelectRegIn(unsigned Reg) { + if (BBSelectRegIn) { + removeLiveOut(BBSelectRegIn); + } + BBSelectRegIn = Reg; + } + + unsigned LinearizedRegion::getBBSelectRegIn() { + return BBSelectRegIn; + } + + void LinearizedRegion::setBBSelectRegOut(unsigned Reg) { + if (BBSelectRegOut) { + removeLiveOut(BBSelectRegOut); + } + BBSelectRegOut = Reg; + addLiveOut(Reg); + } + + unsigned LinearizedRegion::getBBSelectRegOut() { + return BBSelectRegOut; + } + + void LinearizedRegion::setHasLoop(bool Value) { + HasLoop = Value; + } + + bool LinearizedRegion::getHasLoop() { + return HasLoop; + } + + void LinearizedRegion::addLiveOut(unsigned VReg) { + LiveOuts.insert(VReg); + } + + void LinearizedRegion::addLiveOuts(LinearizedRegion *LRegion) { + DenseSet *RegionLiveOuts = LRegion->getLiveOuts(); + for (auto R : *RegionLiveOuts) { + addLiveOut(R); + } + } + + void LinearizedRegion::removeLiveOut(unsigned Reg) { + if (isLiveOut(Reg)) + LiveOuts.erase(Reg); + } + + void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) { + if (isLiveOut(OldReg)) { + removeLiveOut(OldReg); + addLiveOut(NewReg); + } + } + + void LinearizedRegion::replaceRegister(unsigned Register, + unsigned NewRegister, + MachineRegisterInfo *MRI, + bool ReplaceInside, + bool ReplaceOutside, + bool IncludeLoopPHI) { + assert(Register != NewRegister && "Cannot replace a reg with itself"); + + DEBUG(dbgs() << "Pepareing to replace register (region): " << + PrintReg(Register, MRI->getTargetRegisterInfo()) << + " with " << + PrintReg(NewRegister, MRI->getTargetRegisterInfo()) << + "\n"); + + // If we are replacing outside, we also need to update the LiveOuts + if (ReplaceOutside && isLiveOut(Register)) { + LinearizedRegion *Current = this; + while (Current != nullptr) { + DEBUG(dbgs() <<"Region before register replace\n"); + DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); + Current->replaceLiveOut(Register, NewRegister); + DEBUG(dbgs() << "Region after register replace\n"); + DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); + Current = Current->getParent(); + } + } + + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), + E = MRI->reg_end(); I != E; ) { + MachineOperand &O = *I; + ++I; + + // We don't rewrite defs. + if (O.isDef()) + continue; + + bool IsInside = contains(O.getParent()->getParent()); + bool IsLoopPHI = IsInside && (O.getParent()->isPHI() && O.getParent()->getParent() == getEntry()); + bool ShouldReplace = (IsInside && ReplaceInside) || + (!IsInside && ReplaceOutside) || (IncludeLoopPHI && IsLoopPHI); + if (ShouldReplace) { + + if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { + DEBUG(dbgs() << "Trying to substitute physical register: " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); + llvm_unreachable("Cannot substitute physical registers"); + } else { + DEBUG(dbgs() << "Replacing register (region): " << + PrintReg(Register, MRI->getTargetRegisterInfo()) << + " with " << + PrintReg(NewRegister, MRI->getTargetRegisterInfo()) << + "\n"); + O.setReg(NewRegister); + } + } + } + } + + void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register, + unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI) { + replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs); + } + + void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register, + unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI) { + replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs); + } + + DenseSet *LinearizedRegion::getLiveOuts() { + return &LiveOuts; + } + + void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) { + Entry = NewEntry; + } + + MachineBasicBlock *LinearizedRegion::getEntry() { + return Entry; + } + + void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { + Exit = NewExit; + } + + MachineBasicBlock *LinearizedRegion::getExit() { + return Exit; + } + + void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { + MBBs.insert(MBB); + } + + bool LinearizedRegion::contains(MachineBasicBlock *MBB) { + return MBBs.count(MBB) == 1; + } + + bool LinearizedRegion::isLiveOut(unsigned Reg) { + return LiveOuts.count(Reg) == 1; + } + + bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) { + return MRI->def_begin(Reg) == MRI->def_end(); + } + + bool LinearizedRegion::isDefinedInRegion(unsigned Reg, + MachineRegisterInfo *MRI) { + bool NoDef = hasNoDef(Reg, MRI); + if (NoDef) { + return false; + } + + if (!MRI->hasOneDef(Reg)) { + DEBUG(dbgs() << "Register " << + PrintReg(Reg, MRI->getTargetRegisterInfo()) << + " has multiple defs\n"); + } + + assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); + MachineOperand *Def = &(*(MRI->def_begin(Reg))); + return contains(Def->getParent()->getParent()); + } + + // After the code has been structurized, what was flagged as kills + // before are no longer register kills. + void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) { + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + for (auto MBBI : MBBs) { + MachineBasicBlock *MBB = MBBI; + for (auto &II : *MBB) { + for (auto &RI : II.uses()) { + if (RI.isReg()) { + unsigned Reg = RI.getReg(); + if (TRI->isVirtualRegister(Reg)) { + if (hasNoDef(Reg, MRI)) + continue; + if (!MRI->hasOneDef(Reg)) { + DEBUG(this->getEntry()->getParent()->dump()); + DEBUG(dbgs() << PrintReg(Reg, TRI) << "\n"); + } + + if (MRI->def_begin(Reg) == MRI->def_end()) { + DEBUG(dbgs() << "Register " << + PrintReg(Reg, MRI->getTargetRegisterInfo()) << + " has NO defs\n"); + } else if (!MRI->hasOneDef(Reg)) { + DEBUG(dbgs() << "Register " << + PrintReg(Reg, MRI->getTargetRegisterInfo()) << + " has multiple defs\n"); + } + + assert(MRI->hasOneDef(Reg) && + "Register has multiple definitions"); + MachineOperand *Def = &(*(MRI->def_begin(Reg))); + MachineOperand *UseOperand = &(RI); + bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB; + if (UseIsOutsideDefMBB && UseOperand->isKill()) { + DEBUG(dbgs() << "Removing kill flag on register: " << + PrintReg(Reg, TRI) << "\n"); + UseOperand->setIsKill(false); + } + } + } + } + } + } + } + + void LinearizedRegion::initLiveOut(RegionMRT *Region, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + storeLiveOuts(Region, MRI, TRI, PHIInfo); + } + + LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + setEntry(MBB); + setExit(MBB); + storeLiveOuts(MBB, MRI, TRI, PHIInfo); + MBBs.insert(MBB); + BBSelectRegOut = 0; + BBSelectRegIn = 0; + Parent = nullptr; + } + + LinearizedRegion::LinearizedRegion(RegionMRT *Region, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + setEntry(Region->getEntry()); + // We don't have a single exit block that is part of the region + // at this point. When the transform is performed this block + // will be created. + setExit(nullptr); + storeLiveOuts(Region, MRI, TRI, PHIInfo); + BBSelectRegOut = 0; + BBSelectRegIn = 0; + Parent = nullptr; + } + + LinearizedRegion::LinearizedRegion() { + setEntry(nullptr); + setExit(nullptr); + BBSelectRegOut = 0; + BBSelectRegIn = 0; + Parent = nullptr; + } + + LinearizedRegion::~LinearizedRegion() {} + + class MachineCFGStructurizer : public MachineFunctionPass { + private: + const MachineRegionInfo *Regions; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + unsigned BBSelectRegister; + PHILinearize PHIInfo; + DenseMap FallthroughMap; + + void getPHIRegionIndices(RegionMRT *Region, + MachineInstr& PHI, + SmallVector &RegionIndices); + void getPHIRegionIndices(LinearizedRegion *Region, + MachineInstr& PHI, + SmallVector &RegionIndices); + void getPHINonRegionIndices( + LinearizedRegion *Region, + MachineInstr &PHI, + SmallVector &PHINonRegionIndices); + + void storePHILinearizationInfoDest( + unsigned LDestReg, + MachineInstr& PHI, + SmallVector *RegionIndices = nullptr); + + unsigned storePHILinearizationInfo( + MachineInstr& PHI, + SmallVector *RegionIndices); + + void extractKilledPHIs(MachineBasicBlock *MBB); + void extractKilledPHIs(LinearizedRegion *LRegion); + + bool shrinkPHI(MachineInstr &PHI, + SmallVector &PHIIndices, + unsigned *ReplaceReg); + + bool shrinkPHI(MachineInstr &PHI, + unsigned CombinedSourceReg, + MachineBasicBlock *SourceMBB, + SmallVector &PHIIndices, + unsigned *ReplaceReg); + + void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg, + MachineBasicBlock *LastMerge, + SmallVector &PHIRegionIndices); + void replaceEntryPHI(MachineInstr &PHI, + unsigned CombinedSourceReg, + MachineBasicBlock *IfMBB, + SmallVector &PHIRegionIndices); + void replaceLiveOutRegs(MachineInstr &PHI, + SmallVector &PHIRegionIndices, + unsigned CombinedSourceReg, + LinearizedRegion *LRegion); + void rewriteRegionExitPHI(RegionMRT *Region, + MachineBasicBlock *LastMerge, + MachineInstr &PHI, + LinearizedRegion *LRegion); + + void rewriteRegionExitPHIs(RegionMRT *Region, + MachineBasicBlock *LastMerge, + LinearizedRegion *LRegion); + void rewriteRegionEntryPHI(LinearizedRegion *Region, + MachineBasicBlock *IfMBB, + MachineInstr &PHI); + void rewriteRegionEntryPHIs(LinearizedRegion *Region, + MachineBasicBlock *IfMBB); + + bool regionIsSimpleIf(RegionMRT *Region); + + void transformSimpleIfRegion(RegionMRT *Region); + + bool regionIsSimpleLoop(RegionMRT *Region); + + void transformSimpleLoopRegion(RegionMRT *Region); + + void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II); + + void insertUnconditionalBranch(MachineBasicBlock *MBB, + MachineBasicBlock *Dest, + DebugLoc DL = DebugLoc()); + + MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region); + + void replaceRegisterOutsideMBB(MachineBasicBlock *MBB, + unsigned Register, + unsigned NewRegister); + + void insertMergePHI(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + unsigned DestRegister, + unsigned IfSourceRegister, + unsigned CodeSourceRegister, + bool IsUndefIfSource = false); + + MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB, + MachineBasicBlock *CodeBBStart, + MachineBasicBlock *CodeBBEnd, + unsigned IfReg, + bool InheritPreds); + + void createEntryPHIs(LinearizedRegion *CurrentRegion); + void resolvePHIInfos(MachineBasicBlock *FunctionEntry); + + void replaceRegisterWith(unsigned Register, unsigned NewRegister); + + MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB, + MachineBasicBlock *CodeBB, + LinearizedRegion *LRegion, + unsigned BBSelectRegIn, + unsigned BBSelectRegOut); + + MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeMBB, + LinearizedRegion *InnerRegion, + LinearizedRegion *CurrentRegion, + unsigned BBSelectRegIn, + unsigned BBSelectRegOut); + void ensureCondIsNotKilled(SmallVector Cond); + + void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + unsigned BBSelectReg); + + MachineInstr *getDefInstr(unsigned Reg); + void insertChainedPHI(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, + unsigned DestReg, + unsigned SourceReg); + bool containsDef(MachineBasicBlock *MBB, + LinearizedRegion *InnerRegion, + unsigned Register); + void rewriteLiveOutRegs(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, + LinearizedRegion *LRegion); + + void splitLoopPHI(MachineInstr &PHI, + MachineBasicBlock *Entry, + MachineBasicBlock *EntrySucc, + LinearizedRegion *LRegion); + void splitLoopPHIs(MachineBasicBlock *Entry, + MachineBasicBlock *EntrySucc, + LinearizedRegion *LRegion); + + MachineBasicBlock *splitEntry(LinearizedRegion *LRegion); + + LinearizedRegion *createLinearizedRegion(RegionMRT *Region); + + bool structurizeComplexRegion(RegionMRT *Region); + + bool structurizeRegion(RegionMRT *Region); + + bool structurizeRegions(RegionMRT *Region, bool isTopRegion); + + public: + static char ID; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + MachineCFGStructurizer() : MachineFunctionPass(ID) { + initializeMachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); + } + + void initFallthroughMap(MachineFunction &MF); + + void initializeSelectRegisters(RegionMRT *Region, + unsigned ExistingEntryReg, + unsigned ExistingExitReg, + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII); + + RegionMRT *RMRT; + void setRegionMRT(RegionMRT *RegionTree) { + RMRT = RegionTree; + } + + RegionMRT *getRegionMRT() { + return RMRT; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + }; +} + +char MachineCFGStructurizer::ID = 0; + +bool MachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) { + MachineBasicBlock *Entry = Region->getEntry(); + MachineBasicBlock *Succ = Region->getSucc(); + bool FoundBypass = false; + bool FoundIf = false; + + if (Entry->succ_size() != 2) { + return false; + } + + for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(), + E = Entry->succ_end(); SI != E; ++SI) { + MachineBasicBlock *Current = *SI; + + if (Current == Succ) { + FoundBypass = true; + } else if ((Current->succ_size() == 1) && + *(Current->succ_begin()) == Succ) { + FoundIf = true; + } + } + + return FoundIf && FoundBypass; +} + +void MachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) { + MachineBasicBlock *Entry = Region->getEntry(); + MachineBasicBlock *Exit = Region->getExit(); + TII->convertNonUniformIfRegion(Entry, Exit); +} + +bool MachineCFGStructurizer::regionIsSimpleLoop(RegionMRT *Region) { + MachineBasicBlock *Entry = Region->getEntry(); + + if (Entry->succ_size() != 1) { + return false; + } + + int NumRegionExitEdges = 0; + MachineBasicBlock* BackBlock = NULL; + for (MachineBasicBlock::const_pred_iterator PI = Entry->succ_begin(), + PE = Entry->succ_end(); PI != PE; ++PI) { + MachineBasicBlock *CurrentSP = *PI; + if (Region->contains(CurrentSP)) { + BackBlock = CurrentSP; + } + } + + bool HasBackedge = false; + + for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(), + SE = Entry->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *CurrentBS = *SI; + if (CurrentBS == Entry) { + HasBackedge = true; + } + } + + return NumRegionExitEdges == 1 && BackBlock->succ_size() == 2 && + HasBackedge; +} + +void MachineCFGStructurizer::transformSimpleLoopRegion(RegionMRT *Region) { + MachineBasicBlock *Entry = Region->getEntry(); + MachineBasicBlock* BackBlock = NULL; + for (MachineBasicBlock::const_pred_iterator PI = Entry->succ_begin(), + PE = Entry->succ_end(); PI != PE; ++PI) { + MachineBasicBlock *CurrentSP = *PI; + if (Region->contains(CurrentSP)) { + BackBlock = CurrentSP; + } + } + + TII->convertNonUniformLoopRegion(Entry, BackBlock); +} + +// If a region region is just a sequence of regions (and the exit +// block in the case of the top level region), we can simply skip +// linearizing it, because it is already linear/ +// +bool regionIsSequence(RegionMRT *Region) { + auto Children = Region->getChildren(); + for (auto CI : *Children) { + if (!CI->isRegion()) { + if (CI->getMBBMRT()->getMBB()->succ_size() > 1) { + return false; + } + } + } + return true; +} + +void MachineCFGStructurizer::getPHIRegionIndices( + RegionMRT *Region, + MachineInstr &PHI, + SmallVector &PHIRegionIndices) { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs ; ++i) { + MachineBasicBlock *Pred = getPHIPred(PHI, i); + if (Region->contains(Pred)) { + PHIRegionIndices.push_back(i); + } + } +} + +void MachineCFGStructurizer::getPHIRegionIndices( + LinearizedRegion *Region, + MachineInstr &PHI, + SmallVector &PHIRegionIndices) { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs ; ++i) { + MachineBasicBlock *Pred = getPHIPred(PHI, i); + if (Region->contains(Pred)) { + PHIRegionIndices.push_back(i); + } + } +} + +void MachineCFGStructurizer::getPHINonRegionIndices( + LinearizedRegion *Region, + MachineInstr &PHI, + SmallVector &PHINonRegionIndices) { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs ; ++i) { + MachineBasicBlock *Pred = getPHIPred(PHI, i); + if (!Region->contains(Pred)) { + PHINonRegionIndices.push_back(i); + } + } +} + +void MachineCFGStructurizer::storePHILinearizationInfoDest( + unsigned LDestReg, + MachineInstr& PHI, + SmallVector *RegionIndices) { + if (RegionIndices) { + for (auto i : *RegionIndices) { + PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i)); + } + } else { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs ; ++i) { + PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i)); + } + } +} + +unsigned MachineCFGStructurizer::storePHILinearizationInfo( + MachineInstr& PHI, + SmallVector *RegionIndices) { + unsigned DestReg = getPHIDestReg(PHI); + unsigned LinearizeDestReg = + MRI->createVirtualRegister(MRI->getRegClass(DestReg)); + PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc()); + storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices); + return LinearizeDestReg; +} + +void MachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) { + // We need to create a new chain for the killed phi, but there is no + // need to do the renaming outside or inside the block. + SmallPtrSet PHIs; + for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), + E = MBB->instr_end(); I != E; ++I) { + MachineInstr &Instr = *I; + if (Instr.isPHI()) { + unsigned PHIDestReg = getPHIDestReg(Instr); + DEBUG(dbgs() << "Extractking killed phi:\n"); + DEBUG(Instr.dump()); + PHIs.insert(&Instr); + PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc()); + storePHILinearizationInfoDest(PHIDestReg, Instr); + } + } + + for (auto PI : PHIs) { + PI->eraseFromParent(); + } +} + +void MachineCFGStructurizer::extractKilledPHIs(LinearizedRegion *LRegion) { +// PHIs can only exist in the entry block. + extractKilledPHIs(LRegion->getEntry()); +} + +static bool isPHIRegionIndex(SmallVector PHIRegionIndices, + unsigned Index) { + for (auto i : PHIRegionIndices) { + if (i == Index) + return true; + } + return false; +} + +bool MachineCFGStructurizer::shrinkPHI( + MachineInstr &PHI, + SmallVector &PHIIndices, + unsigned *ReplaceReg) { + return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg); +} + +bool MachineCFGStructurizer::shrinkPHI( + MachineInstr &PHI, + unsigned CombinedSourceReg, + MachineBasicBlock *SourceMBB, + SmallVector &PHIIndices, + unsigned *ReplaceReg) { + DEBUG(dbgs() << "Shrink PHI: "); + DEBUG(PHI.dump()); + DEBUG(dbgs() << " to " << PrintReg(getPHIDestReg(PHI), TRI) << + " = PHI("); + + bool Replaced = false; + unsigned NumInputs = getPHINumInputs(PHI); + int SingleExternalEntryIndex = -1; + for (unsigned i = 0; i < NumInputs ; ++i) { + if (!isPHIRegionIndex(PHIIndices, i)) { + if (SingleExternalEntryIndex == -1) { + // Single entry + SingleExternalEntryIndex = i; + } else { + // Multiple entries + SingleExternalEntryIndex = -2; + } + } + } + + if (SingleExternalEntryIndex > -1) { + *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex); + // We should not rewrite the code, we should only pick up the single value + // that represents the shrunk PHI. + // replaceRegisterWith(getPHIDestReg(PHI), *ReplaceReg); + Replaced = true; + } else { + MachineBasicBlock *MBB = PHI.getParent(); + MachineInstrBuilder MIB = + BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), + getPHIDestReg(PHI)); + if (SourceMBB) { + MIB.addReg(CombinedSourceReg); + MIB.addMBB(SourceMBB); + DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" << + SourceMBB->getNumber()); + } + + for (unsigned i = 0; i < NumInputs ; ++i) { + if (isPHIRegionIndex(PHIIndices, i)) { + continue; + } + unsigned SourceReg = getPHISourceReg(PHI, i); + MachineBasicBlock *SourcePred = getPHIPred(PHI, i); + MIB.addReg(SourceReg); + MIB.addMBB(SourcePred); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" << + SourcePred->getNumber()); + } + DEBUG(dbgs() << ")\n"); + } + PHI.eraseFromParent(); + return Replaced; +} + +void MachineCFGStructurizer::replacePHI( + MachineInstr &PHI, + unsigned CombinedSourceReg, + MachineBasicBlock *LastMerge, + SmallVector &PHIRegionIndices) { + DEBUG(dbgs() << "Replace PHI: "); + DEBUG(PHI.dump()); + DEBUG(dbgs() << " with " << PrintReg(getPHIDestReg(PHI), TRI) << + " = PHI("); + + bool HasExternalEdge = false; + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs ; ++i) { + if (!isPHIRegionIndex(PHIRegionIndices, i)) { + HasExternalEdge = true; + } + } + + if (HasExternalEdge) { + MachineBasicBlock *MBB = PHI.getParent(); + MachineInstrBuilder MIB = + BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), + getPHIDestReg(PHI)); + MIB.addReg(CombinedSourceReg); + MIB.addMBB(LastMerge); + DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" << + LastMerge->getNumber()); + for (unsigned i = 0; i < NumInputs ; ++i) { + if (isPHIRegionIndex(PHIRegionIndices, i)) { + continue; + } + unsigned SourceReg = getPHISourceReg(PHI, i); + MachineBasicBlock *SourcePred = getPHIPred(PHI, i); + MIB.addReg(SourceReg); + MIB.addMBB(SourcePred); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" << + SourcePred->getNumber()); + } + DEBUG(dbgs() << ")\n"); + } else { + replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg); + } + PHI.eraseFromParent(); +} + +void MachineCFGStructurizer::replaceEntryPHI( + MachineInstr &PHI, + unsigned CombinedSourceReg, + MachineBasicBlock *IfMBB, + SmallVector &PHIRegionIndices) { + + DEBUG(dbgs() << "Replace entry PHI: "); + DEBUG(PHI.dump()); + DEBUG(dbgs() << " with " << PrintReg(getPHIDestReg(PHI), TRI) << + " = PHI("); + + MachineBasicBlock *MBB = PHI.getParent(); + MachineInstrBuilder MIB = + BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), + getPHIDestReg(PHI)); + MIB.addReg(CombinedSourceReg); + MIB.addMBB(IfMBB); + DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" << + IfMBB->getNumber()); + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs ; ++i) { + if (isPHIRegionIndex(PHIRegionIndices, i)) { + continue; + } + unsigned SourceReg = getPHISourceReg(PHI, i); + MachineBasicBlock *SourcePred = getPHIPred(PHI, i); + MIB.addReg(SourceReg); + MIB.addMBB(SourcePred); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" << + SourcePred->getNumber()); + } + DEBUG(dbgs() << ")\n"); + PHI.eraseFromParent(); +} + +void MachineCFGStructurizer::replaceLiveOutRegs( + MachineInstr &PHI, + SmallVector &PHIRegionIndices, + unsigned CombinedSourceReg, + LinearizedRegion *LRegion) +{ + bool WasLiveOut = false; + for (auto PII : PHIRegionIndices) { + unsigned Reg = getPHISourceReg(PHI, PII); + if (LRegion->isLiveOut(Reg)) { + LRegion->removeLiveOut(Reg); + WasLiveOut = true; + } + } + + if (WasLiveOut) + LRegion->addLiveOut(CombinedSourceReg); +} + +void MachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region, + MachineBasicBlock *LastMerge, + MachineInstr &PHI, + LinearizedRegion *LRegion) { + SmallVector PHIRegionIndices; + getPHIRegionIndices(Region, PHI, PHIRegionIndices); + unsigned LinearizedSourceReg = + storePHILinearizationInfo(PHI, &PHIRegionIndices); + + replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices); + replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion); +} + +void MachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region, + MachineBasicBlock *IfMBB, + MachineInstr &PHI) { + SmallVector PHINonRegionIndices; + getPHINonRegionIndices(Region, PHI, PHINonRegionIndices); + unsigned LinearizedSourceReg = + storePHILinearizationInfo(PHI, &PHINonRegionIndices); + replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices); +} + +static void collectPHIs(MachineBasicBlock *MBB, + SmallVector &PHIs) { + for (auto &BBI : *MBB) { + if (BBI.isPHI()) { + PHIs.push_back(&BBI); + } + } +} + +void MachineCFGStructurizer::rewriteRegionExitPHIs( + RegionMRT *Region, + MachineBasicBlock *LastMerge, + LinearizedRegion *LRegion) { + SmallVector PHIs; + auto Exit = Region->getSucc(); + if (Exit == nullptr) + return; + + collectPHIs(Exit, PHIs); + + for (auto PHII : PHIs) { + rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion); + } +} + +void MachineCFGStructurizer::rewriteRegionEntryPHIs( + LinearizedRegion *Region, + MachineBasicBlock *IfMBB) { + SmallVector PHIs; + auto Entry = Region->getEntry(); + + collectPHIs(Entry, PHIs); + + for (auto PHII : PHIs) { + rewriteRegionEntryPHI(Region, IfMBB, *PHII); + } +} + +void MachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB, + MachineBasicBlock *Dest, + DebugLoc DL) { + MachineBasicBlock::instr_iterator Terminator = + MBB->getFirstInstrTerminator(); + bool HasTerminator = Terminator != MBB->instr_end(); + if (HasTerminator) { + TII->ReplaceTailWithBranchTo(Terminator, Dest); + } if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(Dest)) { + TII->InsertUnconditionalBranch(*MBB, Dest, DL); + } +} + +static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) { + MachineBasicBlock *result = nullptr; + for (auto &MFI : MF) { + if (MFI.succ_size() == 0) { + if (result == nullptr) { + result = &MFI; + } else { + return nullptr; + } + } + } + + return result; +} + +static bool hasOneExitNode(MachineFunction &MF) { + return getSingleExitNode(MF) != nullptr; +} + +MachineBasicBlock *MachineCFGStructurizer::createLinearizedExitBlock( + RegionMRT *Region) { + auto Exit = Region->getSucc(); + + // If the exit is the end of the function, we just use the existing + MachineFunction *MF = Region->getEntry()->getParent(); + if (Exit == nullptr && hasOneExitNode(*MF)) { + return &(*(--(Region->getEntry()->getParent()->end()))); + } + + MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock(); + if (Exit == nullptr) { + MachineFunction::iterator ExitIter = MF->end(); + MF->insert(ExitIter, LastMerge); + } else { + MachineFunction::iterator ExitIter = Exit->getIterator(); + MF->insert(ExitIter, LastMerge); + LastMerge->addSuccessor(Exit); + insertUnconditionalBranch(LastMerge, Exit); + DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() << "\n"); + } + return LastMerge; +} + +void MachineCFGStructurizer::replaceRegisterOutsideMBB(MachineBasicBlock *MBB, + unsigned Register, + unsigned NewRegister) { + assert(Register != NewRegister && "Cannot replace a reg with itself"); + + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), + E = MRI->reg_end(); I != E; ) { + MachineOperand &O = *I; + ++I; + if (O.getParent()->getParent() != MBB) { + if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { + llvm_unreachable("Cannot substitute physical registers"); + // We don't handle physical registers, but if we need to in the future + // This is how we do it: O.substPhysReg(NewRegister, *TRI); + } else { + O.setReg(NewRegister); + } + } + } +} + +void MachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + unsigned DestRegister, + unsigned IfSourceRegister, + unsigned CodeSourceRegister, + bool IsUndefIfSource) { + // If this is the function exit block, we don't need a phi. + if (MergeBB->succ_begin() == MergeBB->succ_end()) { + return; + } + DEBUG(dbgs() << "Merge PHI (BB#" << MergeBB->getNumber() << "): " << + PrintReg(DestRegister, TRI) << " = PHI(" << + PrintReg(IfSourceRegister, TRI) << ", BB#" << IfBB->getNumber() << + PrintReg(CodeSourceRegister, TRI) << ", BB#" << CodeBB->getNumber() << + ")\n"); + DebugLoc DL = MergeBB->findDebugLoc(MergeBB->begin()); + MachineInstrBuilder MIB = + BuildMI(*MergeBB, MergeBB->instr_begin(), DL, + TII->get(TargetOpcode::PHI), DestRegister); + if (IsUndefIfSource && false) { + MIB.addReg(IfSourceRegister, RegState::Undef); + } else { + MIB.addReg(IfSourceRegister); + } + MIB.addMBB(IfBB); + MIB.addReg(CodeSourceRegister); + MIB.addMBB(CodeBB); +} + +static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) { + for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), + E = MBB->succ_end(); PI != E; ++PI) { + if ((*PI) != MBB) { + (MBB)->removeSuccessor(*PI); + } + } +} + +static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, + MachineBasicBlock *EndMBB) { + + // We have to check against the StartMBB successor becasuse a + // structurized region with a loop will have the entry block split, + // and the backedge will go to the entry successor. + unsigned SuccSize = StartMBB->succ_size(); + if (SuccSize > 0) { + MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin()); + for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(), + E = EndMBB->succ_end(); PI != E; ++PI) { + // Either we have a back-edge to the entry block, or a back-edge to the + // succesor of the entry block since the block may be split. + if ((*PI) != StartMBB && !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) { + DEBUG(dbgs() << "Removing edge: BB#" << EndMBB->getNumber() << + " -> BB#" << (*PI)->getNumber() << "\n"); + (EndMBB)->removeSuccessor(*PI); + } + } + } + + for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(), + E = StartMBB->pred_end(); PI != E; ++PI) { + if ((*PI) != EndMBB) { + DEBUG(dbgs() << "Removing edge: BB#" << (*PI)->getNumber() << + " -> BB#" << StartMBB->getNumber() << "\n"); + (*PI)->removeSuccessor(StartMBB); + } + } +} + +MachineBasicBlock *MachineCFGStructurizer::createIfBlock( + MachineBasicBlock *MergeBB, + MachineBasicBlock *CodeBBStart, + MachineBasicBlock *CodeBBEnd, + unsigned IfReg, + bool InheritPreds) { + MachineFunction *MF = MergeBB->getParent(); + MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock(); + + if (InheritPreds) { + for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(), + E = CodeBBStart->pred_end(); PI != E; ++PI) { + if ((*PI) != CodeBBEnd) { + MachineBasicBlock *Pred = (*PI); + Pred->addSuccessor(IfBB); + for (auto &TI : Pred->terminators()) { + for (auto &UI : TI.uses()) { + if (UI.isMBB() && UI.getMBB() == CodeBBStart) { + UI.setMBB(IfBB); + } + } + } + } + } + } + + removeExternalCFGEdges(CodeBBStart, CodeBBEnd); + + auto CodeBBStartI = CodeBBStart->getIterator(); + auto CodeBBEndI = CodeBBEnd->getIterator(); + auto MergeIter = MergeBB->getIterator(); + MF->insert(MergeIter, IfBB); + MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI); + IfBB->addSuccessor(MergeBB); + IfBB->addSuccessor(CodeBBStart); + + DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n"); + // Ensure that the MergeBB is a succesor of the CodeEndBB. + if (!CodeBBEnd->isSuccessor(MergeBB)) + CodeBBEnd->addSuccessor(MergeBB); + + DEBUG(dbgs() << "Moved MBB#" << CodeBBStart->getNumber() << " through MBB#" + << CodeBBEnd->getNumber() << "\n"); + + // If we have a single predecessor we can find a reasonable debug location + MachineBasicBlock *SinglePred = CodeBBStart->pred_size() == 1 ? + *(CodeBBStart->pred_begin()) : nullptr; + DebugLoc DL = SinglePred ? + SinglePred->findDebugLoc(SinglePred->getFirstTerminator()) : + DebugLoc(); + + unsigned Reg = + TII->insertNE(IfBB, IfBB->begin(), DL, IfReg, CodeBBStart->getNumber()); + if (&(*(IfBB->getParent()->begin())) == IfBB) { + TII->insertAssign(*IfBB, IfBB->begin(), DL, IfReg, + CodeBBStart->getNumber()); + } + MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); + ArrayRef Cond(RegOp); + TII->InsertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL); + + return IfBB; +} + +void MachineCFGStructurizer::ensureCondIsNotKilled( + SmallVector Cond) { + assert(Cond.size() == 1 && "Can only handle one cond"); + if (!Cond[0].isReg()) + return; + + unsigned CondReg = Cond[0].getReg(); + for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) { + (*UI).setIsKill(false); + } +} + +void MachineCFGStructurizer::rewriteCodeBBTerminator( + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + unsigned BBSelectReg) { + MachineBasicBlock *TrueBB = nullptr; + MachineBasicBlock *FalseBB = nullptr; + SmallVector Cond; + MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB]; + TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond); + + DebugLoc DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator()); + + if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) { + // This is an exit block, hence no successors. We will assign the bb select register + // to the entry block. + TII->insertAssign(*CodeBB, CodeBB->getFirstTerminator(), DL, + BBSelectReg, CodeBB->getParent()->begin()->getNumber()); + insertUnconditionalBranch(CodeBB, MergeBB, DL); + return; + } + + if (FalseBB == nullptr && TrueBB == nullptr) { + TrueBB = FallthroughBB; + } else if (TrueBB != nullptr && FalseBB == nullptr) { + FalseBB = FallthroughBB; + } + + if (TrueBB != nullptr && FalseBB == nullptr) { + TII->insertAssign(*CodeBB, CodeBB->getFirstTerminator(), DL, + BBSelectReg, TrueBB->getNumber()); + } else { + const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg); + unsigned TrueBBReg = MRI->createVirtualRegister(RegClass); + unsigned FalseBBReg = MRI->createVirtualRegister(RegClass); + TII->insertAssign(*CodeBB, CodeBB->getFirstTerminator(), DL, + TrueBBReg, TrueBB->getNumber()); + TII->insertAssign(*CodeBB, CodeBB->getFirstTerminator(), DL, + FalseBBReg, FalseBB->getNumber()); + ensureCondIsNotKilled(Cond); + TII->insertSelect(*CodeBB, CodeBB->getFirstTerminator(), DL, + BBSelectReg, Cond, TrueBBReg, FalseBBReg); + } + + insertUnconditionalBranch(CodeBB, MergeBB, DL); +} + +MachineInstr *MachineCFGStructurizer::getDefInstr(unsigned Reg) { + if (MRI->def_begin(Reg) == MRI->def_end()) { + DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has NO defs\n"); + } else if (!MRI->hasOneDef(Reg)) { + DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has multiple defs\n"); + DEBUG(dbgs() << "DEFS BEGIN:\n"); + for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { + DEBUG(DI->getParent()->dump()); + } + DEBUG(dbgs() << "DEFS END\n"); + } + + assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); + return (*(MRI->def_begin(Reg))).getParent(); +} + +void MachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, + unsigned DestReg, + unsigned SourceReg) { + // In this function we know we are part of a chain already, so we need + // to add the registers to the existing chain, and rename the register + // inside the region. + MachineInstr *DefInstr = getDefInstr(SourceReg); + if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB) { + // Handle the case where the def is a PHI-def, then we only + // need to do renaming. Special care needs to be taken if the + // PHI-def is part of an existing chain, or if a new one needs to be + // created. + InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI); + + // We collect all PHI Information, and if we are at the region entry, + // all PHIs will be removed, and then re-introduced if needed. + storePHILinearizationInfoDest(DestReg, *DefInstr); + // We have picked up all the information we need now and can remove + // the PHI + PHIInfo.removeSource(DestReg, SourceReg); + DefInstr->eraseFromParent(); + } else { + InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI); + + const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg); + unsigned NextDestReg = MRI->createVirtualRegister(RegClass); + bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1; + DEBUG(dbgs() << "Insert Chained PHI\n"); + insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg, + SourceReg, IsLastDef); + + PHIInfo.removeSource(DestReg, SourceReg); + if (IsLastDef) { + DebugLoc DL = IfBB->findDebugLoc(IfBB->getFirstTerminator()); + TII->insertAssign(*IfBB, IfBB->getFirstTerminator(), DL, + NextDestReg, 0); + PHIInfo.deleteDef(DestReg); + } else { + PHIInfo.replaceDef(DestReg, NextDestReg); + } + } +} + +bool MachineCFGStructurizer::containsDef(MachineBasicBlock *MBB, + LinearizedRegion *InnerRegion, + unsigned Register) { + return getDefInstr(Register)->getParent() == MBB || + InnerRegion->contains(getDefInstr(Register)->getParent()); +} + +void MachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, + LinearizedRegion *LRegion) { + DenseSet *LiveOuts = InnerRegion->getLiveOuts(); + SmallVector OldLiveOuts; + for (auto OLI : *LiveOuts) { + OldLiveOuts.push_back(OLI); + } + + for (auto LI : OldLiveOuts) { + DEBUG(dbgs() << "LiveOut: " << PrintReg(LI, TRI)); + if (!containsDef(CodeBB, InnerRegion, LI)) { + // If the register simly lives through the CodeBB, we don't have + // to rewrite anything since the register is not defined in this + // part of the code. + DEBUG(dbgs() << "- through"); + continue; + } + DEBUG(dbgs() << "\n"); + unsigned Reg = LI; + if (!PHIInfo.isSource(Reg)){ + // If the register is just a live out def and not part of a phi + // chain, we need to create a PHI node to handle the if region, + // and replace all uses outside of the region with the new dest + // register. + const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); + unsigned PHIDestReg = MRI->createVirtualRegister(RegClass); + unsigned IfSourceReg = MRI->createVirtualRegister(RegClass); + // Create initializer, this value is never used, but is needed + // to satisfy SSA. + DEBUG(dbgs() << "Initializer for reg: " << PrintReg(Reg) << "\n"); + TII->insertAssign(*IfBB, IfBB->getFirstTerminator(), DebugLoc(), + IfSourceReg, 0); + + InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI); + DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n"); + insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg, + IfSourceReg, Reg, true); + } + + unsigned DestReg; + while (PHIInfo.findDest(LI, DestReg)) { + // If this register is a PHI Source, we need to update the PHI + // chain. + insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, LI); + } + } + PHIInfo.dump(MRI); +} + +void MachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) { + MachineBasicBlock *Entry = CurrentRegion->getEntry(); + MachineBasicBlock *Exit = CurrentRegion->getExit(); + MachineBasicBlock *Pred = *(Entry->pred_begin()); + + DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() << " Pred: " << + Pred->getNumber() << "\n"); + + PHIInfo.dump(MRI); + + for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); + DRI != DE; ++DRI) { + + int NumSources = 0; + unsigned DestReg = *DRI; + auto SE = PHIInfo.sources_end(DestReg); + + for (auto SRI = PHIInfo.sources_begin(DestReg) ; SRI != SE; ++SRI) { + NumSources++; + } + + if (NumSources == 1) { + auto SRI = PHIInfo.sources_begin(DestReg); + unsigned SourceReg = *SRI; + replaceRegisterWith(DestReg, SourceReg); + } else { + DebugLoc DL = Entry->findDebugLoc(Entry->begin()); + MachineInstrBuilder MIB = + BuildMI(*Entry, Entry->instr_begin(), DL, + TII->get(TargetOpcode::PHI), DestReg); + DEBUG(dbgs() << "Entry PHI " << PrintReg(*DRI, TRI) << " = PHI("); + + for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { + unsigned SourceReg = *SRI; + MIB.addReg(SourceReg); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#"); + + if (CurrentRegion->isDefinedInRegion(SourceReg, MRI)) { + MIB.addMBB(Exit); + DEBUG(dbgs() << Exit->getNumber()); + } else { + MIB.addMBB(Pred); + DEBUG(dbgs() << Pred->getNumber()); + } + } + DEBUG(dbgs() << ")\n"); + } + } + PHIInfo.clear(); +} + +void MachineCFGStructurizer::replaceRegisterWith(unsigned Register, unsigned NewRegister) { + assert(Register != NewRegister && "Cannot replace a reg with itself"); + + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), + E = MRI->reg_end(); I != E; ) { + MachineOperand &O = *I; + ++I; + if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { + DEBUG(dbgs() << "Trying to substitute physical register: " << + PrintReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); + llvm_unreachable("Cannot substitute physical registers"); + // We don't handle physical registers, but if we need to + // in the future This is how we do it: + // O.substPhysReg(NewRegister, *TRI); + } else { + DEBUG(dbgs() << "Replacing register: " << + PrintReg(Register, MRI->getTargetRegisterInfo()) << + " with " << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); + O.setReg(NewRegister); + } + } + PHIInfo.deleteDef(Register); + + getRegionMRT()->replaceLiveOutReg(Register, NewRegister); + + DEBUG(PHIInfo.dump(MRI)); +} + +void MachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) +{ + DEBUG(dbgs() << "Resolve PHI Infos\n"); + DEBUG(PHIInfo.dump(MRI)); + for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); + DRI != DE; ++DRI) { + unsigned DestReg = *DRI; + DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) << "\n"); + auto SRI = PHIInfo.sources_begin(DestReg); + unsigned SourceReg = *SRI; + DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) << " SourceReg: " << + PrintReg(SourceReg, TRI) << "\n"); + + assert(PHIInfo.sources_end(DestReg) == ++SRI && + "More than one phi source in entry node"); + replaceRegisterWith(DestReg, SourceReg); + } +} + +static bool isFunctionEntryBlock(MachineBasicBlock *MBB) { + return ((&(*(MBB->getParent()->begin()))) == MBB); +} + +MachineBasicBlock *MachineCFGStructurizer::createIfRegion( + MachineBasicBlock *MergeBB, + MachineBasicBlock *CodeBB, + LinearizedRegion *CurrentRegion, + unsigned BBSelectRegIn, + unsigned BBSelectRegOut) { + if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) { + // Handle non-loop function entry block. + // We need to allow loops to the entry block and then + rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); + resolvePHIInfos(CodeBB); + removeExternalCFGSuccessors(CodeBB); + CodeBB->addSuccessor(MergeBB); + CurrentRegion->addMBB(CodeBB); + return nullptr; + } if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) { + // Handle non-loop region entry block. + rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); + createEntryPHIs(CurrentRegion); + removeExternalCFGSuccessors(CodeBB); + CodeBB->addSuccessor(MergeBB); + CurrentRegion->addMBB(CodeBB); + return nullptr; + } else { + // Handle internal block. + const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn); + unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass); + rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg); + bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB; + MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, + BBSelectRegIn, IsRegionEntryBB); + CurrentRegion->addMBB(IfBB); + // If this is the entry block we need to make the If block the new + // linearized region entry. + if (IsRegionEntryBB) { + CurrentRegion->setEntry(IfBB); + + if (CurrentRegion->getHasLoop()) { + MachineBasicBlock *RegionExit = CurrentRegion->getExit(); + MachineBasicBlock *ETrueBB = nullptr; + MachineBasicBlock *EFalseBB = nullptr; + SmallVector ECond; + + DebugLoc DL = DebugLoc(); // RegionExit->getFirstTerminator()->getDebugLoc(); + TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); + TII->RemoveBranch(*RegionExit); + + // We need to create a backedge if there is a loop + unsigned Reg = TII->insertNE(RegionExit, RegionExit->instr_end(), + DL, BBSelectRegOut, + RegionExit->getNumber()); + MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, + true); + ArrayRef Cond(RegOp); + DEBUG(dbgs() << "RegionExitReg: "); + DEBUG(Cond[0].print(dbgs(), TRI)); + DEBUG(dbgs() << "\n"); + TII->InsertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, + Cond, DebugLoc()); + RegionExit->addSuccessor(CurrentRegion->getEntry()); + } + } + CurrentRegion->addMBB(CodeBB); + LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo); + // InnerRegion.setParent(CurrentRegion); + // if (CurrentRegion->getExit() == MergeBB) { + // for (auto &LiveOutReg : *(CurrentRegion->getLiveOuts())) { + // InnerRegion.addLiveOut(LiveOutReg); + // } + // } + + InnerRegion.setParent(CurrentRegion); + DEBUG(dbgs() << "Insert BB Select PHI (BB)\n"); + insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn, + CodeBBSelectReg); + InnerRegion.addMBB(MergeBB); + + DEBUG(InnerRegion.print(dbgs(), TRI)); + rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion); + extractKilledPHIs(CodeBB); + if (IsRegionEntryBB) { + createEntryPHIs(CurrentRegion); + } + return IfBB; + } +} + +MachineBasicBlock *MachineCFGStructurizer::createIfRegion( + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, + LinearizedRegion *CurrentRegion, + unsigned BBSelectRegIn, + unsigned BBSelectRegOut) { + unsigned CodeBBSelectReg = InnerRegion->getBBSelectRegOut(); + MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry(); + MachineBasicBlock *CodeExitBB = InnerRegion->getExit(); + MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB, + BBSelectRegIn, true); + CurrentRegion->addMBB(IfBB); + rewriteLiveOutRegs(IfBB, CodeEntryBB, MergeBB, InnerRegion, CurrentRegion); + DEBUG(dbgs() << "Insert BB Select PHI (region)\n"); + insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn, + CodeBBSelectReg); + + rewriteRegionEntryPHIs(InnerRegion, IfBB); + return IfBB; +} + +void MachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI, + MachineBasicBlock *Entry, + MachineBasicBlock *EntrySucc, + LinearizedRegion *LRegion) { + SmallVector PHIRegionIndices; + getPHIRegionIndices(LRegion, PHI, PHIRegionIndices); + + assert(PHIRegionIndices.size() == 1); + + unsigned RegionIndex = PHIRegionIndices[0]; + unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex); + MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex); + unsigned PHIDest = getPHIDestReg(PHI); + unsigned PHISource = PHIDest; + unsigned ReplaceReg; + + if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) { + PHISource = ReplaceReg; + } + + const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest); + unsigned NewDestReg = MRI->createVirtualRegister(RegClass); + LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI); + MachineInstrBuilder MIB = + BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(), + TII->get(TargetOpcode::PHI), NewDestReg); + DEBUG(dbgs() << "Split Entry PHI " << PrintReg(NewDestReg, TRI) << " = PHI("); + MIB.addReg(PHISource); + MIB.addMBB(Entry); + DEBUG(dbgs() << PrintReg(PHISource, TRI) << ", BB#" << Entry->getNumber()); + MIB.addReg(RegionSourceReg); + MIB.addMBB(RegionSourceMBB); + DEBUG(dbgs() << " ," << PrintReg(RegionSourceReg, TRI) << + ", BB#" << RegionSourceMBB->getNumber() << ")\n"); +} + +void MachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry, + MachineBasicBlock *EntrySucc, + LinearizedRegion *LRegion) { + SmallVector PHIs; + collectPHIs(Entry, PHIs); + + for (auto PHII : PHIs) { + splitLoopPHI(*PHII, Entry, EntrySucc, LRegion); + } +} + +// Split the entry block separating PHI-nodes and the rest of the code +// This is needed to insert an initializer for the bb select register +// inloop regions. + +MachineBasicBlock * +MachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) { + MachineBasicBlock *Entry = LRegion->getEntry(); + MachineBasicBlock *EntrySucc = Entry->split(Entry->getFirstNonPHI()); + MachineBasicBlock *Exit = LRegion->getExit(); + + DEBUG(dbgs() << "Split BB#" << Entry->getNumber() << " to BB#" << + Entry->getNumber() << " -> BB#" << EntrySucc->getNumber() << "\n"); + LRegion->addMBB(EntrySucc); + + // Make the backedge go to Entry Succ + if (Exit->isSuccessor(Entry)) { + Exit->removeSuccessor(Entry); + } + Exit->addSuccessor(EntrySucc); + MachineInstr &Branch = *(Exit->instr_rbegin()); + for (auto &UI : Branch.uses()) { + if (UI.isMBB() && UI.getMBB() == Entry) { + UI.setMBB(EntrySucc); + } + } + + splitLoopPHIs(Entry, EntrySucc, LRegion); + + return EntrySucc; +} + +static unsigned createBBSelectReg(const TargetInstrInfo *TII, + MachineRegisterInfo *MRI) { + return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32)); +} + +LinearizedRegion *MachineCFGStructurizer::createLinearizedRegion( + RegionMRT *Region) { + LinearizedRegion *LRegion = Region->getLinearizedRegion(); + LRegion->initLiveOut(Region, MRI, TRI, PHIInfo); + LRegion->setEntry(Region->getEntry()); + return LRegion; +} + +static void removeOldExitPreds(RegionMRT *Region) { + MachineBasicBlock *Exit = Region->getSucc(); + if (Exit == nullptr) { + return; + } + for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(), + E = Exit->pred_end(); PI != E; ++PI) { + if (Region->contains(*PI)) { + (*PI)->removeSuccessor(Exit); + } + } +} + +static bool containsNewBackedge(RegionMRT *Region) { + SmallPtrSet MBBs; + + SetVector *Children = Region->getChildren(); + // Need to traverse this in reverse since it is in post order. + for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) { + if ((*CI)->isMBB()) { + MachineBasicBlock *MBB = (*CI)->getMBBMRT()->getMBB(); + MBBs.insert(MBB); + for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { + if (MBBs.count(*SI) != 0) { + return true; + } + } + } + } + return false; +} + +bool MachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { + auto *LRegion = createLinearizedRegion(Region); + LRegion->setHasLoop(containsNewBackedge(Region)); + MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region); + MachineBasicBlock *CurrentMerge = LastMerge; + LRegion->addMBB(LastMerge); + LRegion->setExit(LastMerge); + + rewriteRegionExitPHIs(Region, LastMerge, LRegion); + removeOldExitPreds(Region); + + DEBUG(PHIInfo.dump(MRI)); + + auto Entry = Region->getEntry(); + + SetVector *Children = Region->getChildren(); + DEBUG(dbgs() << "===========If Region Start===============\n"); + if (LRegion->getHasLoop()) { + DEBUG(dbgs() << "Has Backedge: Yes\n"); + } else { + DEBUG(dbgs() << "Has Backedge: No\n"); + } + + RegionMRT *Succ = nullptr; + RegionMRT *Pred = nullptr; + + unsigned BBSelectRegIn; + unsigned BBSelectRegOut; + for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) { + DEBUG(LRegion->print(dbgs(), TRI)); + + auto CNI = CI; + ++CNI; + Pred = CNI != CE ? (*CNI)->getRegionMRT() : nullptr; + + MRT *Child = (*CI); + + if (Child->isRegion()) { + + LinearizedRegion *InnerLRegion = + Child->getRegionMRT()->getLinearizedRegion(); + // We found the block is the exit of an inner region, we need + // to put it in the current linearized region. + + DEBUG(dbgs() << "Linearizing region: "); + DEBUG(InnerLRegion->print(dbgs(), TRI)); + DEBUG(dbgs() << "\n"); + + MachineBasicBlock *InnerEntry = InnerLRegion->getEntry(); + if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) { + // Entry has already been linearized, no need to do this region. + resolvePHIInfos(InnerEntry); + continue; + } + bool IsRegionExit = Child == Region->getExitTree(); + BBSelectRegOut = + IsRegionExit ? LRegion->getBBSelectRegOut() : + Succ ? Succ->getLinearizedRegion()->getBBSelectRegIn() : + BBSelectRegIn; + BBSelectRegIn = Pred ? Pred->getLinearizedRegion()->getBBSelectRegOut() : + InnerLRegion->getBBSelectRegIn(); + + DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) << + "\n"); + DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) << + "\n"); + + MachineBasicBlock *IfEnd = CurrentMerge; + CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion, + BBSelectRegIn, BBSelectRegOut); + TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); + } else { + MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB(); + DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n"); + + bool IsRegionEntryBlock = MBB == Region->getEntry(); + bool IsRegionExitBlock = MBB == Region->getExit(); + // bool isFunctionExitBlock = MBB == &(MBB->getParent()->back()); + if (MBB == getSingleExitNode(*(MBB->getParent()))) { + // If this is the exit block then we need to skip to the next. + // The "in" register will be transferred to "out" in the next + // iteration. + BBSelectRegIn = LRegion->getBBSelectRegOut(); + continue; + } + + BBSelectRegOut = + IsRegionExitBlock ? LRegion->getBBSelectRegOut() : + Succ ? Succ->getLinearizedRegion()->getBBSelectRegIn() : + BBSelectRegIn; + + BBSelectRegIn = + IsRegionEntryBlock ? LRegion->getBBSelectRegIn() + // : Pred ? Pred->getLinearizedRegion()->getBBSelectRegOut() + : createBBSelectReg(TII, MRI); + + DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) << + "\n"); + DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) << + "\n"); + MachineBasicBlock *IfEnd = CurrentMerge; + // This is a basic block that is not part of an inner region, we + // need to put it in the current linearized region. + CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn, + BBSelectRegOut); + if (CurrentMerge) { + TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); + } + + DEBUG(PHIInfo.dump(MRI)); + } + Succ = Child->getRegionMRT(); + } + + LRegion->removeFalseRegisterKills(MRI); + + if (LRegion->getHasLoop()) { + MachineBasicBlock *NewSucc = splitEntry(LRegion); + if (isFunctionEntryBlock(LRegion->getEntry())) { + resolvePHIInfos(LRegion->getEntry()); + } + DebugLoc DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI()); + // @@@@ + unsigned InReg = LRegion->getBBSelectRegIn(); + unsigned NewInReg = + MRI->createVirtualRegister(MRI->getRegClass(InReg)); + TII->insertAssign(*(LRegion->getEntry()), + LRegion->getEntry()->getFirstTerminator(), DL, + NewInReg, + //LRegion->getBBSelectRegIn(), + Region->getEntry()->getNumber()); + LRegion->replaceRegisterInsideRegion(InReg, NewInReg, true, MRI); + + TII->convertNonUniformLoopRegion(NewSucc, LastMerge); + } + + DEBUG(Entry->getParent()->dump()); + DEBUG(LRegion->print(dbgs(), TRI)); + DEBUG(PHIInfo.dump(MRI)); + + DEBUG(dbgs() << "===========If Region End===============\n"); + + Region->setLinearizedRegion(LRegion); + return true; +} + +bool MachineCFGStructurizer::structurizeRegion(RegionMRT *Region) { + if (false && regionIsSimpleIf(Region)) { + transformSimpleIfRegion(Region); + return true; + } else if (regionIsSequence(Region)) { + return false; + } else { + structurizeComplexRegion(Region); + } + return false; +} + +static int structurize_once = 0; + +bool MachineCFGStructurizer::structurizeRegions(RegionMRT *Region, + bool isTopRegion) { + bool Changed = false; + + auto Children = Region->getChildren(); + for (auto CI : *Children) { + if (CI->isRegion()) { + Changed |= structurizeRegions(CI->getRegionMRT(), false); + } + } + + if (structurize_once < 2 || true) { + Changed |= structurizeRegion(Region); + structurize_once++; + } + return Changed; +} + +void MachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) { + for (auto &MBBI : MF) { + FallthroughMap[&MBBI] = MBBI.getFallThrough(); + } +} + +void +MachineCFGStructurizer::initializeSelectRegisters(RegionMRT *Region, + unsigned ExistingEntryReg, + unsigned ExistingExitReg, + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII) { + unsigned EntryReg = + ExistingEntryReg != 0 ? ExistingEntryReg : createBBSelectReg(TII, MRI); + unsigned ExitReg = + /* ExistingExitReg != 0 ? ExistingExitReg :*/ createBBSelectReg(TII, MRI); + + DEBUG(dbgs() << "Init Entry Reg: " << + PrintReg(EntryReg, MRI->getTargetRegisterInfo()) << "\n"); + DEBUG(dbgs() << "Init Exit Reg: " << + PrintReg(ExitReg, MRI->getTargetRegisterInfo()) << "\n"); + + LinearizedRegion *LRegion = new LinearizedRegion(); + LRegion->setBBSelectRegIn(EntryReg); + LRegion->setBBSelectRegOut(ExitReg); + DEBUG(dbgs() << "Add LiveOut (BBSelect): " << PrintReg(ExitReg, TRI) << "\n"); + Region->setLinearizedRegion(LRegion); + LRegion->setParent(Region->getParent() ? + Region->getParent()->getLinearizedRegion() : + nullptr); + + auto CL = std::prev(Region->getChildren()->end(),1); + unsigned CurrentEntryReg = 0; + unsigned CurrentExitReg = ExitReg; + bool IsFirst = true; + for (auto CI = Region->getChildren()->begin(), + CE = Region->getChildren()->end(); CI != CE; ++CI) { + bool IsLast = CI == CL; + if ((*CI)->isRegion()) { + if (IsFirst) { + CurrentExitReg = 0; + IsFirst = false; + } + CurrentEntryReg = IsLast ? EntryReg : 0; + initializeSelectRegisters((*CI)->getRegionMRT(), CurrentEntryReg, + CurrentExitReg, MRI, TII); + CurrentExitReg = + (*CI)->getRegionMRT()->getLinearizedRegion()->getBBSelectRegIn(); + } else { + DEBUG(dbgs() << "InitMBB: " << (*CI)->getMBBMRT()->getMBB()->getNumber() + << "\n"); + LRegion->addMBB((*CI)->getMBBMRT()->getMBB()); + CurrentEntryReg = 0; + CurrentExitReg = 0; + } + } +} + +static void checkRegOnlyPHIInputs(MachineFunction &MF) { + for (auto &MBBI : MF) { + for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(), + E = MBBI.instr_end(); I != E; ++I) { + MachineInstr &Instr = *I; + if (Instr.isPHI()) { + int numPreds = getPHINumInputs(Instr); + for (int i = 0; i < numPreds; ++i) { + assert(Instr.getOperand(i*2+1).isReg() && "PHI Operand not a register"); + } + } + } + } +} + +bool MachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { + auto &ST = MF.getSubtarget(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + MRI = &(MF.getRegInfo()); + initFallthroughMap(MF); + + checkRegOnlyPHIInputs(MF); + DEBUG(dbgs() << "----STRUCTURIZER START----\n"); + DEBUG(MF.dump()); + + Regions = &(getAnalysis().getRegionInfo()); + DEBUG(Regions->dump()); + + RegionMRT *RTree = MRT::buildMRT(MF, Regions); + setRegionMRT(RTree); + initializeSelectRegisters(RTree,0,0,MRI,TII); + DEBUG(RTree->dump(TRI)); + bool result = structurizeRegions(RTree, true); + delete RTree; + DEBUG(dbgs() << "----STRUCTURIZER END----\n"); + return result; +} + +char &llvm::MachineCFGStructurizerID = MachineCFGStructurizer::ID; + +INITIALIZE_PASS_BEGIN(MachineCFGStructurizer, "machine-cfg-structurizer", + "Machine CFG Structurizer", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass) +INITIALIZE_PASS_END(MachineCFGStructurizer, "machine-cfg-structurizer", + "Machine CFG Structurizer", false, false) +FunctionPass * +llvm::createMachineCFGStructurizer() { + return new MachineCFGStructurizer(); +} Index: lib/CodeGen/MachineRegionInfo.cpp =================================================================== --- lib/CodeGen/MachineRegionInfo.cpp +++ lib/CodeGen/MachineRegionInfo.cpp @@ -1,5 +1,5 @@ - #include "llvm/CodeGen/MachineRegionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/RegionInfoImpl.h" #include "llvm/CodeGen/MachinePostDominators.h" @@ -36,7 +36,6 @@ MachineRegionInfo::MachineRegionInfo() : RegionInfoBase>() { - } MachineRegionInfo::~MachineRegionInfo() { @@ -104,8 +103,10 @@ void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive(); - AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); } void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const { @@ -119,14 +120,15 @@ #endif char MachineRegionInfoPass::ID = 0; +char &MachineRegionInfoPassID = MachineRegionInfoPass::ID; -INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, "regions", - "Detect single entry single exit regions", true, true) +INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, "machine-regions", + "Detect machine single entry single exit regions", true, true) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) -INITIALIZE_PASS_END(MachineRegionInfoPass, "regions", - "Detect single entry single exit regions", true, true) +INITIALIZE_PASS_END(MachineRegionInfoPass, "machine-regions", + "Detect machine single entry single exit regions", true, true) // Create methods available outside of this file, to use them // "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -490,11 +490,15 @@ // FIXME: We need to run a pass to propagate the attributes when calls are // supported. addPass(&AMDGPUAnnotateKernelFeaturesID); - addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions + if (TM->Options.Structurizer == CFGStructurizer::Early) { + addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions + } addPass(createSinkingPass()); addPass(createSITypeRewriter()); addPass(createAMDGPUAnnotateUniformValues()); - addPass(createSIAnnotateControlFlowPass()); + if (TM->Options.Structurizer == CFGStructurizer::Early) { + addPass(createSIAnnotateControlFlowPass()); + } return false; } @@ -548,6 +552,9 @@ #endif void GCNPassConfig::addPreRegAlloc() { + if (TM->Options.Structurizer == CFGStructurizer::Late) { + addPass(&MachineCFGStructurizerID); + } addPass(createSIShrinkInstructionsPass()); addPass(createSIWholeQuadModePass()); Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -125,6 +125,31 @@ RegScavenger *RS, unsigned TmpReg, unsigned Offset, unsigned Size) const; + void insertAssign(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL, + unsigned DestReg, + unsigned Value) const override; + + const TargetRegisterClass *getPreferredSelectRegClass( + unsigned Size) const override; + + virtual void insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned DstReg, ArrayRef Cond, + unsigned TrueReg, unsigned FalseReg) const override; + + unsigned insertNE(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned SrcReg, int Value) const override; + + unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + RegScavenger *RS, + unsigned TmpReg, + unsigned Offset, + unsigned Size) const; + void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -541,8 +566,17 @@ return get(pseudoToMCOpcode(Opcode)); } + unsigned getInstSizeInBytes(const MachineInstr &MI) const override; + virtual void convertNonUniformIfRegion( + MachineBasicBlock *IfEntry, + MachineBasicBlock *IfEnd) const override; + + virtual void convertNonUniformLoopRegion( + MachineBasicBlock *LoopEntry, + MachineBasicBlock *LoopEnd) const override; + ArrayRef> getSerializableTargetIndices() const override; Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -342,7 +342,6 @@ MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - // If we are trying to copy to or from SCC, there is a bug somewhere else in // the backend. While it may be theoretically possible to do this, it should // never be necessary. @@ -497,6 +496,166 @@ } } +void SIInstrInfo::insertAssign(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL, + unsigned DestReg, + unsigned Value) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg); + if (RegClass == &AMDGPU::SReg_32RegClass || + RegClass == &AMDGPU::SGPR_32RegClass || + RegClass == &AMDGPU::SReg_32_XM0RegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg).addImm(Value); + } else if (RegClass == &AMDGPU::SReg_64RegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg).addImm(Value); + } else if (RegClass == &AMDGPU::VGPR_32RegClass || RegClass == &AMDGPU::VReg_1RegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg).addImm(Value); + } else if (RegClass == &AMDGPU::VReg_64RegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg).addImm(Value); + } else if( RegClass == &AMDGPU::SReg_128RegClass) { + unsigned Reg0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg0).addImm(Value); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg1).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg2).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg3).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), DestReg) + .addReg(Reg0) + .addImm(AMDGPU::sub0) + .addReg(Reg1) + .addImm(AMDGPU::sub1) + .addReg(Reg2) + .addImm(AMDGPU::sub2) + .addReg(Reg3) + .addImm(AMDGPU::sub3); + } else if( RegClass == &AMDGPU::SReg_256RegClass) { + unsigned Reg0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg4 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg5 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg6 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg7 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg0).addImm(Value); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg1).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg2).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg3).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg4).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg5).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg6).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg7).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), DestReg) + .addReg(Reg0) + .addImm(AMDGPU::sub0) + .addReg(Reg1) + .addImm(AMDGPU::sub1) + .addReg(Reg2) + .addImm(AMDGPU::sub2) + .addReg(Reg3) + .addImm(AMDGPU::sub3) + .addReg(Reg4) + .addImm(AMDGPU::sub4) + .addReg(Reg5) + .addImm(AMDGPU::sub5) + .addReg(Reg6) + .addImm(AMDGPU::sub6) + .addReg(Reg7) + .addImm(AMDGPU::sub7); + } else if( RegClass == &AMDGPU::VReg_128RegClass) { + unsigned Reg0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg2 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg3 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg0).addImm(Value); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg1).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg2).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg3).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), DestReg) + .addReg(Reg0) + .addImm(AMDGPU::sub0) + .addReg(Reg1) + .addImm(AMDGPU::sub1) + .addReg(Reg2) + .addImm(AMDGPU::sub2) + .addReg(Reg3) + .addImm(AMDGPU::sub3); + } else if( RegClass == &AMDGPU::VReg_256RegClass) { + unsigned Reg0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg2 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg3 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg4 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg5 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg6 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg7 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg0).addImm(Value); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg1).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg2).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg3).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg4).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg5).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg6).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg7).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), DestReg) + .addReg(Reg0) + .addImm(AMDGPU::sub0) + .addReg(Reg1) + .addImm(AMDGPU::sub1) + .addReg(Reg2) + .addImm(AMDGPU::sub2) + .addReg(Reg3) + .addImm(AMDGPU::sub3) + .addReg(Reg4) + .addImm(AMDGPU::sub4) + .addReg(Reg5) + .addImm(AMDGPU::sub5) + .addReg(Reg6) + .addImm(AMDGPU::sub6) + .addReg(Reg7) + .addImm(AMDGPU::sub7); + } else { + llvm_unreachable("Unsupported register class"); + } +} + +const TargetRegisterClass *SIInstrInfo::getPreferredSelectRegClass( + unsigned Size) const { + return &AMDGPU::VGPR_32RegClass; +} + +void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned DstReg, ArrayRef Cond, + unsigned TrueReg, unsigned FalseReg) const { + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RegClass = MRI.getRegClass(DstReg); + assert(Cond.size() == 1 && "Invalid Cond array: size != 1"); + assert(RegClass == &AMDGPU::VGPR_32RegClass && "Not a VGPR32 reg"); + + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .addOperand(Cond[0]); +} + +unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned SrcReg, int Value) const { + // FIXME(Jan): It seems that we use a register to connect a test + // with a branch, so we need to add i + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg) + .addImm(Value) + .addReg(SrcReg); + + return Reg; +} + int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const { const unsigned Opcode = MI.getOpcode(); @@ -1093,13 +1252,19 @@ return false; } - BranchPredicate Pred = getBranchPredicate(I->getOpcode()); - if (Pred == INVALID_BR) - return true; + MachineBasicBlock *CondBB = NULL; - MachineBasicBlock *CondBB = I->getOperand(0).getMBB(); - Cond.push_back(MachineOperand::CreateImm(Pred)); + if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { + CondBB = I->getOperand(1).getMBB(); + Cond.push_back(I->getOperand(0)); + } else { + BranchPredicate Pred = getBranchPredicate(I->getOpcode()); + if (Pred == INVALID_BR) + return true; + CondBB = I->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(Pred)); + } ++I; if (I == MBB.end()) { @@ -1143,6 +1308,13 @@ return 1; } + if(Cond.size() == 1 && Cond[0].isReg()) { + BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO)) + .addOperand(Cond[0]) + .addMBB(TBB); + return 1; + } + assert(TBB && Cond[0].isImm()); unsigned Opcode @@ -1167,8 +1339,12 @@ bool SIInstrInfo::ReverseBranchCondition( SmallVectorImpl &Cond) const { assert(Cond.size() == 1); - Cond[0].setImm(-Cond[0].getImm()); - return false; + if (Cond[0].isImm()) { + Cond[0].setImm(-Cond[0].getImm()); + return false; + } else { + return true; + } } static void removeModOperands(MachineInstr &MI) { @@ -3194,6 +3370,79 @@ } } +void +SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock* IfEntry, + MachineBasicBlock *IfEnd) const { + MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator(); + assert(TI != IfEntry->end()); + + MachineInstr *Branch = &(*TI); + MachineFunction *MF = IfEntry->getParent(); + MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo(); + + if(Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { + unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + MachineInstr* SIIF = + BuildMI(*(MF), Branch->getDebugLoc(), + get(AMDGPU::SI_IF), DstReg) + .addOperand(Branch->getOperand(0)).addOperand(Branch->getOperand(1)); + MachineInstr* SIEND = + BuildMI(*(MF), Branch->getDebugLoc(), + get(AMDGPU::SI_END_CF)) + .addReg(DstReg); + + IfEntry->erase(TI); + IfEntry->insert(IfEntry->end(), SIIF); + IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND); + } +} + +void SIInstrInfo::convertNonUniformLoopRegion( + MachineBasicBlock *LoopEntry, + MachineBasicBlock *LoopEnd) const { + MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator(); + // We expect 2 terminators, one conditional and one unconditional. + assert(TI != LoopEnd->end()); + + MachineInstr *Branch = &(*TI); + MachineFunction *MF = LoopEnd->getParent(); + MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo(); + + if(Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { + + unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + MachineInstrBuilder HeaderPHIBuilder = + BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg); + for(MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(), + E = LoopEntry->pred_end(); PI != E; ++PI) { + if (*PI == LoopEnd) { + HeaderPHIBuilder.addReg(BackEdgeReg); + } else { + MachineBasicBlock* PMBB = *PI; + unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + insertAssign(*PMBB, PMBB->getFirstTerminator(), DebugLoc(), ZeroReg, 0); + HeaderPHIBuilder.addReg(ZeroReg); + } + HeaderPHIBuilder.addMBB(*PI); + } + MachineInstr* HeaderPhi = HeaderPHIBuilder; + MachineInstr* SIIFBREAK = + BuildMI(*(MF), Branch->getDebugLoc(), + get(AMDGPU::SI_IF_BREAK), BackEdgeReg) + .addReg(DstReg).addOperand(Branch->getOperand(0)); + MachineInstr* SILOOP = + BuildMI(*(MF), Branch->getDebugLoc(), + get(AMDGPU::SI_LOOP)) + .addReg(BackEdgeReg).addMBB(LoopEntry); + + LoopEntry->insert(LoopEntry->begin(), HeaderPhi); + LoopEnd->erase(TI); + LoopEnd->insert(LoopEnd->end(), SIIFBREAK); + LoopEnd->insert(LoopEnd->end(), SILOOP); + } +} + ArrayRef> SIInstrInfo::getSerializableTargetIndices() const { static const std::pair TargetIndices[] = { Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1360,6 +1360,14 @@ let isTerminator = 1 in { +let Uses = [EXEC] in { + def SI_NON_UNIFORM_BRCOND_PSEUDO : PseudoInstSI < + (outs), + (ins SReg_64:$vcc, brtarget:$target), + [(brcond i1:$vcc, bb:$target)] + >; +} + def SI_IF: CFPseudoInstSI < (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target), [(set i64:$dst, (int_amdgcn_if i1:$vcc, bb:$target))], 1, 1> {