Index: include/llvm/ADT/SetVector.h =================================================================== --- include/llvm/ADT/SetVector.h +++ include/llvm/ADT/SetVector.h @@ -113,6 +113,12 @@ return vector_.rend(); } + /// \brief Return the first element of the SetVector. + const T &front() const { + assert(!empty() && "Cannot call front() on empty SetVector!"); + return vector_.front(); + } + /// \brief Return the last element of the SetVector. const T &back() const { assert(!empty() && "Cannot call back() on empty SetVector!"); Index: include/llvm/CodeGen/CommandFlags.h =================================================================== --- include/llvm/CodeGen/CommandFlags.h +++ include/llvm/CodeGen/CommandFlags.h @@ -39,6 +39,16 @@ cl::value_desc("cpu-name"), cl::init("")); +cl::opt +CFGStructurizerType("cfg-structurizer", + cl::desc("Specify early or late CFG structurization for AMDGPU target"), + cl::init(CFGStructurizer::Late), + cl::values( + clEnumValN(CFGStructurizer::Early, "early", + "Structurize IR before code generation"), + clEnumValN(CFGStructurizer::Late, "late", + "Structurize after code generation"))); + cl::list MAttrs("mattr", cl::CommaSeparated, @@ -279,6 +289,7 @@ TargetOptions Options; Options.LessPreciseFPMADOption = EnableFPMAD; Options.AllowFPOpFusion = FuseFPOps; + Options.Structurizer = CFGStructurizerType; Options.UnsafeFPMath = EnableUnsafeFPMath; Options.NoInfsFPMath = EnableNoInfsFPMath; Options.NoNaNsFPMath = EnableNoNaNsFPMath; Index: include/llvm/CodeGen/MachineBasicBlock.h =================================================================== --- include/llvm/CodeGen/MachineBasicBlock.h +++ include/llvm/CodeGen/MachineBasicBlock.h @@ -441,10 +441,19 @@ /// other block. bool isLayoutSuccessor(const MachineBasicBlock *MBB) const; - /// Return true if the block can implicitly transfer control to the block - /// after it by falling off the end of it. This should return false if it can - /// reach the block after it, but it uses an explicit branch to do so (e.g., a - /// table jump). True is a conservative answer. + + /// Return the fallthrough block if the block can implicitly + /// transfer control to the block after it by falling off the end of + /// it. This should return null if it can reach the block after + /// it, but it uses an explicit branch to do so (e.g., a table + /// jump). Non-null return is a conservative answer. + MachineBasicBlock *getFallThrough(); + + /// Return true if the block can implicitly transfer control to the + /// block after it by falling off the end of it. This should return + /// false if it can reach the block after it, but it uses an + /// explicit branch to do so (e.g., a table jump). True is a + /// conservative answer. bool canFallThrough(); /// Returns a pointer to the first instruction in this block that is not a @@ -601,6 +610,10 @@ Insts.clear(); } + /// Split a block right before 'Where' and create two blocks in sequence. + /// Return the new basic block created as a successor after the original + static MachineBasicBlock *split(iterator Where); + /// Take an instruction from MBB 'Other' at the position From, and insert it /// into this MBB right before 'Where'. /// Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -79,6 +79,9 @@ /// MachineDominanaceFrontier - This pass is a machine dominators analysis pass. extern char &MachineDominanceFrontierID; + /// MachineRegionInfo - This pass computes SESE regions for machine functions. + extern char &MachineRegionInfoPassID; + /// EdgeBundles analysis - Bundle machine CFG edges. extern char &EdgeBundlesID; @@ -141,6 +144,11 @@ /// This pass adds dead/undef flags after analyzing subregister lanes. extern char &DetectDeadLanesID; + /// MachineCFGStructurizer - This pass performs machine code structurization. + extern char &MachineCFGStructurizerID; + + FunctionPass *createMachineCFGStructurizer(); + /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. /// Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -222,6 +222,7 @@ void initializeMachineBlockPlacementPass(PassRegistry&); void initializeMachineBlockPlacementStatsPass(PassRegistry&); void initializeMachineBranchProbabilityInfoPass(PassRegistry&); +void initializeMachineCFGStructurizerPass(PassRegistry&); void initializeMachineCSEPass(PassRegistry&); void initializeMachineCombinerPass(PassRegistry &); void initializeMachineCopyPropagationPass(PassRegistry&); Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -582,8 +582,8 @@ MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded = nullptr) const { - return insertBranch(MBB, DestBB, nullptr, - ArrayRef(), DL, BytesAdded); + return insertBranch(MBB, DestBB, nullptr, ArrayRef(), DL, + BytesAdded); } /// Analyze the loop code, return true if it cannot be understoo. Upon @@ -606,6 +606,15 @@ llvm_unreachable("Target didn't implement ReduceLoopCount"); } + /// Insert unconditionalbranch code into the end of the specified + /// MachineBasicBlock. + unsigned InsertUnconditionalBranch(MachineBasicBlock &MBB, + MachineBasicBlock *DBB, + DebugLoc DL) const { + return insertBranch(MBB, DBB, nullptr, SmallVector(), + DL); + } + /// Delete the instruction OldInst and everything after it, replacing it with /// an unconditional branch to NewDest. This is used by the tail merging pass. virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, @@ -674,6 +683,14 @@ return false; } + /// Return the register class that is preferred to be used with select + /// instructions by the target, given the minimum size in bits. + virtual const TargetRegisterClass * + getPreferredSelectRegClass(unsigned Size) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::getPreferred" + "SelectRegClass!"); + } + /// Return true if it is possible to insert a select /// instruction that chooses between TrueReg and FalseReg based on the /// condition code in Cond. @@ -916,6 +933,25 @@ /// Return true when a target supports MachineCombiner. virtual bool useMachineCombiner() const { return false; } + virtual bool isNonUniformBranchInstr(MachineInstr &Instr) const { + llvm_unreachable("Target didn't implement" + " TargetInstrInfo::isNonUniformBranchInstr"); + } + + /// Handle simple if region + virtual void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, + MachineBasicBlock *IfEnd) const { + llvm_unreachable("Target didn't implement" + " TargetInstrInfo::convertNonUniformIfRegion"); + } + + /// Handle simple loop region + virtual void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, + MachineBasicBlock *LoopEnd) const { + llvm_unreachable("Target didn't implement" + " TargetInstrInfo::convertNonUniformLoopRegion"); + } + protected: /// Target-dependent implementation for foldMemoryOperand. /// Target-independent code in foldMemoryOperand will @@ -1086,10 +1122,29 @@ return true; } + /// Insert assign + virtual void insertAssign(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DstReg, unsigned Value) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::insertAssign"); + } + + /// Insert assign to 0 + void insertClear(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc DL, unsigned DstReg) const { + // The target will have to create a special case when 0 is passed. + insertAssign(MBB, MI, DL, DstReg, 0); + } + /// Insert a noop into the instruction stream at the specified point. virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; + /// Insert a function return instruction at the end of MBB. + /// The function assumes there is no existing terminator. + virtual void insertReturn(MachineBasicBlock &MBB) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::insertReturn"); + } /// Return the noop instruction to use for a noop. virtual void getNoopForMachoTarget(MCInst &NopInst) const; @@ -1201,6 +1256,22 @@ /// targets may choose to honor. bool usePreRAHazardRecognizer() const; + /// Insert equality compare instruction betwen a register and a + /// constant, and return the register where the result is stored. + virtual unsigned insertEQ(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned SrcReg, int Value) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::insertEQ"); + } + + /// Insert non-equality compare instruction betwen a register and a + /// constant, and return the register where the result is stored. + virtual unsigned insertNE(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned SrcReg, int Value) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::insertNE"); + } + /// For a comparison instruction, return the source registers /// in SrcReg and SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction Index: include/llvm/Target/TargetOptions.h =================================================================== --- include/llvm/Target/TargetOptions.h +++ include/llvm/Target/TargetOptions.h @@ -72,6 +72,13 @@ GNU }; + namespace CFGStructurizer { + enum Type { + Early, // IR Level + Late // Machine Level + }; + } + /// Identify a debugger for "tuning" the debug info. /// /// The "debugger tuning" concept allows us to present a more intuitive @@ -114,7 +121,8 @@ ThreadModel(ThreadModel::POSIX), EABIVersion(EABI::Default), DebuggerTuning(DebuggerKind::Default), FPDenormalMode(FPDenormal::IEEE), - ExceptionModel(ExceptionHandling::None) {} + ExceptionModel(ExceptionHandling::None), + Structurizer(CFGStructurizer::Early) {} /// PrintMachineCode - This flag is enabled when the -print-machineinstrs /// option is specified on the command line, and should enable debugging @@ -268,6 +276,9 @@ /// What exception model to use ExceptionHandling ExceptionModel; + /// What structurizer to use + CFGStructurizer::Type Structurizer; + /// Machine level options. MCTargetOptions MCOptions; }; @@ -293,6 +304,7 @@ ARE_EQUAL(EmulatedTLS) && ARE_EQUAL(FloatABIType) && ARE_EQUAL(AllowFPOpFusion) && + ARE_EQUAL(Structurizer) && ARE_EQUAL(ThreadModel) && ARE_EQUAL(EABIVersion) && ARE_EQUAL(DebuggerTuning) && Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -56,6 +56,7 @@ MachineBlockFrequencyInfo.cpp MachineBlockPlacement.cpp MachineBranchProbabilityInfo.cpp + MachineCFGStructurizer.cpp MachineCombiner.cpp MachineCopyPropagation.cpp MachineCSE.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -46,6 +46,7 @@ initializeMachineBlockFrequencyInfoPass(Registry); initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); + initializeMachineCFGStructurizerPass(Registry); initializeMachineCSEPass(Registry); initializeImplicitNullChecksPass(Registry); initializeMachineCombinerPass(Registry); @@ -57,6 +58,7 @@ initializeMachineModuleInfoPass(Registry); initializeMachinePipelinerPass(Registry); initializeMachinePostDominatorTreePass(Registry); + initializeMachineRegionInfoPassPass(Registry); initializeMachineSchedulerPass(Registry); initializeMachineSinkingPass(Registry); initializeMachineVerifierPassPass(Registry); Index: lib/CodeGen/MachineBasicBlock.cpp =================================================================== --- lib/CodeGen/MachineBasicBlock.cpp +++ lib/CodeGen/MachineBasicBlock.cpp @@ -488,7 +488,7 @@ // FIXME: This does not seem like a reasonable pattern to support, but it // has been seen in the wild coming out of degenerate ARM test cases. TII->removeBranch(*this); - + // Finally update the unconditional successor to be reached via a branch if // it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) @@ -684,16 +684,16 @@ return std::next(I) == MachineFunction::const_iterator(MBB); } -bool MachineBasicBlock::canFallThrough() { +MachineBasicBlock *MachineBasicBlock::getFallThrough() { MachineFunction::iterator Fallthrough = getIterator(); ++Fallthrough; // If FallthroughBlock is off the end of the function, it can't fall through. if (Fallthrough == getParent()->end()) - return false; + return nullptr; // If FallthroughBlock isn't a successor, no fallthrough is possible. if (!isSuccessor(&*Fallthrough)) - return false; + return nullptr; // Analyze the branches, if any, at the end of the block. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; @@ -705,25 +705,31 @@ // is possible. The isPredicated check is needed because this code can be // called during IfConversion, where an instruction which is normally a // Barrier is predicated and thus no longer an actual control barrier. - return empty() || !back().isBarrier() || TII->isPredicated(back()); + return (empty() || !back().isBarrier() || TII->isPredicated(back())) + ? &*Fallthrough + : nullptr; } // If there is no branch, control always falls through. - if (!TBB) return true; + if (!TBB) return &*Fallthrough; // If there is some explicit branch to the fallthrough block, it can obviously // reach, even though the branch should get folded to fall through implicitly. if (MachineFunction::iterator(TBB) == Fallthrough || MachineFunction::iterator(FBB) == Fallthrough) - return true; + return &*Fallthrough; // If it's an unconditional branch to some block not the fall through, it // doesn't fall through. - if (Cond.empty()) return false; + if (Cond.empty()) return nullptr; // Otherwise, if it is conditional and has no explicit false block, it falls // through. - return FBB == nullptr; + return (FBB == nullptr) ? &*Fallthrough : nullptr; +} + +bool MachineBasicBlock::canFallThrough() { + return getFallThrough() != nullptr; } MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, @@ -999,6 +1005,22 @@ return true; } +MachineBasicBlock *MachineBasicBlock::split(iterator I) { + // Create the fall-through block. + MachineBasicBlock *MBB = (*I).getParent(); + MachineFunction *MF = MBB->getParent(); + MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock(); + auto MBBIter = ++(MBB->getIterator()); + MF->insert(MBBIter, SuccMBB); + SuccMBB->transferSuccessorsAndUpdatePHIs(MBB); + MBB->addSuccessor(SuccMBB); + + // Splice the code over. + SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end()); + + return SuccMBB; +} + /// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's /// neighboring instructions so the bundle won't be broken by removing MI. static void unbundleSingleMI(MachineInstr *MI) { Index: lib/CodeGen/MachineCFGStructurizer.cpp =================================================================== --- /dev/null +++ lib/CodeGen/MachineCFGStructurizer.cpp @@ -0,0 +1,2667 @@ +//===-- MachineCFGStructurizer.cpp - Machine code if conversion pass. -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine instruction level CFG structurizer pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegionInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "machinecfgstructurizer" + +namespace { +class PHILinearizeDestIterator; + +class PHILinearize { + friend class PHILinearizeDestIterator; + +private: + typedef DenseSet PHISourcesT; + typedef struct { + unsigned DestReg; + DebugLoc DL; + PHISourcesT Sources; + } PHIInfoElementT; + typedef SmallPtrSet PHIInfoT; + PHIInfoT PHIInfo; + + static unsigned phiInfoElementGetDest(PHIInfoElementT *Info); + static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef); + static DebugLoc phiInfoElementGetDebugLoc(PHIInfoElementT *Info); + static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info); + static void phiInfoElementAddSource(PHIInfoElementT *Info, + unsigned SourceReg); + static void phiInfoElementRemoveSource(PHIInfoElementT *Info, + unsigned SourceReg); + PHIInfoElementT *findPHIInfoElement(unsigned DestReg); + PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg); + +public: + void addDest(unsigned DestReg, DebugLoc DL); + void replaceDef(unsigned OldDestReg, unsigned NewDestReg); + void deleteDef(unsigned DestReg); + DebugLoc getDebugLoc(unsigned DestReg); + void addSource(unsigned DestReg, unsigned SourceReg); + void removeSource(unsigned DestReg, unsigned SourceReg); + bool findDest(unsigned SourceReg, unsigned &DestReg); + bool isSource(unsigned Reg); + unsigned getNumSources(unsigned DestReg); + void dump(MachineRegisterInfo *MRI); + void clear(); + + typedef PHISourcesT::iterator source_iterator; + typedef PHILinearizeDestIterator dest_iterator; + + dest_iterator dests_begin(); + dest_iterator dests_end(); + + source_iterator sources_begin(unsigned Reg); + source_iterator sources_end(unsigned Reg); +}; + +class PHILinearizeDestIterator { +private: + PHILinearize::PHIInfoT::iterator Iter; + +public: + unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); } + PHILinearizeDestIterator &operator++() { + ++Iter; + return *this; + } + bool operator==(const PHILinearizeDestIterator &I) const { + return I.Iter == Iter; + } + bool operator!=(const PHILinearizeDestIterator &I) const { + return I.Iter != Iter; + } + + PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {} +}; + +unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) { + return Info->DestReg; +} + +void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info, + unsigned NewDef) { + Info->DestReg = NewDef; +} + +DebugLoc PHILinearize::phiInfoElementGetDebugLoc(PHIInfoElementT *Info) { + return Info->DL; +} + +PHILinearize::PHISourcesT & +PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) { + return Info->Sources; +} + +void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info, + unsigned SourceReg) { + phiInfoElementGetSources(Info).insert(SourceReg); +} + +void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info, + unsigned SourceReg) { + phiInfoElementGetSources(Info).erase(SourceReg); +} + +PHILinearize::PHIInfoElementT * +PHILinearize::findPHIInfoElement(unsigned DestReg) { + for (auto I : PHIInfo) { + if (phiInfoElementGetDest(I) == DestReg) { + return I; + } + } + return nullptr; +} + +PHILinearize::PHIInfoElementT * +PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg) { + for (auto I : PHIInfo) { + for (auto SI : phiInfoElementGetSources(I)) { + if (SI == SourceReg) { + return I; + } + } + } + return nullptr; +} + +void PHILinearize::addDest(unsigned DestReg, DebugLoc DL) { + assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists"); + PHISourcesT EmptySet; + PHIInfoElementT *NewElement = new PHIInfoElementT(); + NewElement->DestReg = DestReg; + NewElement->DL = DL; + NewElement->Sources = EmptySet; + PHIInfo.insert(NewElement); +} + +void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) { + phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg); +} + +void PHILinearize::deleteDef(unsigned DestReg) { + PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg); + PHIInfo.erase(InfoElement); + delete InfoElement; +} + +DebugLoc PHILinearize::getDebugLoc(unsigned DestReg) { + return phiInfoElementGetDebugLoc(findPHIInfoElement(DestReg)); +} + +void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg) { + phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg); +} + +void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg) { + phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg); +} + +bool PHILinearize::findDest(unsigned SourceReg, unsigned &DestReg) { + PHIInfoElementT *InfoElement = findPHIInfoElementFromSource(SourceReg); + if (InfoElement != nullptr) { + DestReg = phiInfoElementGetDest(InfoElement); + return true; + } + return false; +} + +bool PHILinearize::isSource(unsigned Reg) { + unsigned DestReg; + return findDest(Reg, DestReg); +} + +unsigned PHILinearize::getNumSources(unsigned DestReg) { + return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size(); +} + +void PHILinearize::dump(MachineRegisterInfo *MRI) { + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + DEBUG(dbgs() << "=PHIInfo Start=\n"); + for (auto PII : this->PHIInfo) { + PHIInfoElementT &Element = *PII; + DEBUG(dbgs() << "Dest: " << PrintReg(Element.DestReg, TRI) + << " Sources: {"); + for (auto &SI : Element.Sources) { + DEBUG(dbgs() << PrintReg(SI, TRI) << ","); + } + DEBUG(dbgs() << "}\n"); + } + DEBUG(dbgs() << "=PHIInfo End=\n"); +} + +void PHILinearize::clear() { PHIInfo = PHIInfoT(); } + +PHILinearize::dest_iterator PHILinearize::dests_begin() { + return PHILinearizeDestIterator(PHIInfo.begin()); +} + +PHILinearize::dest_iterator PHILinearize::dests_end() { + return PHILinearizeDestIterator(PHIInfo.end()); +} + +PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) { + auto InfoElement = findPHIInfoElement(Reg); + return phiInfoElementGetSources(InfoElement).begin(); +} +PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) { + auto InfoElement = findPHIInfoElement(Reg); + return phiInfoElementGetSources(InfoElement).end(); +} + +class RegionMRT; +class MBBMRT; + +static unsigned getPHINumInputs(MachineInstr &PHI) { + assert(PHI.isPHI()); + return (PHI.getNumOperands() - 1) / 2; +} + +static MachineBasicBlock *getPHIPred(MachineInstr &PHI, unsigned Index) { + assert(PHI.isPHI()); + return PHI.getOperand(Index * 2 + 2).getMBB(); +} + +static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) { + assert(PHI.isPHI()); + return PHI.getOperand(Index * 2 + 1).getReg(); +} + +static unsigned getPHIDestReg(MachineInstr &PHI) { + assert(PHI.isPHI()); + return PHI.getOperand(0).getReg(); +} + +class LinearizedRegion { +private: + MachineBasicBlock *Entry; + // Note: This exit block is part of the region, and is the last + // merge block before exiting the region. + MachineBasicBlock *Exit; + DenseSet LiveOuts; + SmallPtrSet MBBs; + unsigned BBSelectRegIn; + unsigned BBSelectRegOut; + bool HasLoop; + LinearizedRegion *Parent; + RegionMRT *RMRT; + + void storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, + MachineInstr *DefInstr, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); + + void storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, + MachineInstr *DefInstr, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo); + + void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); + + void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, + RegionMRT *TopRegion = nullptr); + +public: + void setRegionMRT(RegionMRT *Region) { RMRT = Region; } + + RegionMRT *getRegionMRT() { return RMRT; } + + void setParent(LinearizedRegion *P) { Parent = P; } + + LinearizedRegion *getParent() { return Parent; } + + void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr); + + void setBBSelectRegIn(unsigned Reg); + + unsigned getBBSelectRegIn(); + + void setBBSelectRegOut(unsigned Reg, bool IsLiveOut); + + unsigned getBBSelectRegOut(); + + void setHasLoop(bool Value); + + bool getHasLoop(); + + void addLiveOut(unsigned VReg); + + void addLiveOuts(LinearizedRegion *LRegion); + + void removeLiveOut(unsigned Reg); + + void replaceLiveOut(unsigned OldReg, unsigned NewReg); + + void replaceRegister(unsigned Register, unsigned NewRegister, + MachineRegisterInfo *MRI, bool ReplaceInside, + bool ReplaceOutside, bool IncludeLoopPHIs); + + void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI); + + void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI); + + DenseSet *getLiveOuts(); + + void setEntry(MachineBasicBlock *NewEntry); + + MachineBasicBlock *getEntry(); + + void setExit(MachineBasicBlock *NewExit); + + MachineBasicBlock *getExit(); + + void addMBB(MachineBasicBlock *MBB); + + bool contains(MachineBasicBlock *MBB); + + bool isLiveOut(unsigned Reg); + + bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI); + + bool isDefinedInRegion(unsigned Reg, MachineRegisterInfo *MRI); + + void removeFalseRegisterKills(MachineRegisterInfo *MRI); + + void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); + + LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); + + LinearizedRegion(RegionMRT *Region, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); + + LinearizedRegion(); + + ~LinearizedRegion(); +}; + +class MRT { + RegionMRT *Parent; + +public: + virtual RegionMRT *getRegionMRT() { return nullptr; } + + virtual MBBMRT *getMBBMRT() { return nullptr; } + + virtual unsigned getBBSelectRegIn() = 0; + virtual unsigned getBBSelectRegOut() = 0; + + bool isRegion() { return getRegionMRT() != nullptr; } + + bool isMBB() { return getMBBMRT() != nullptr; } + + bool isRoot() { return Parent == nullptr; } + + void setParent(RegionMRT *Region) { Parent = Region; } + + RegionMRT *getParent() { return Parent; } + + static MachineBasicBlock * + initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, + DenseMap &RegionMap); + + static RegionMRT *buildMRT(MachineFunction &MF, + const MachineRegionInfo *RegionInfo, + const TargetInstrInfo *TII, + MachineRegisterInfo *MRI); + + virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0; + + void dumpDepth(int depth) { + for (int i = depth; i > 0; --i) { + dbgs() << " "; + } + } + + virtual ~MRT() {} +}; + +class MBBMRT : public MRT { + MachineBasicBlock *MBB; + unsigned BBSelectRegIn; + unsigned BBSelectRegOut; + +public: + virtual MBBMRT *getMBBMRT() { return this; } + + MachineBasicBlock *getMBB() { return MBB; } + + unsigned getBBSelectRegIn() { return BBSelectRegIn; } + + unsigned getBBSelectRegOut() { return BBSelectRegOut; } + + void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; } + + void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; } + + virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { + dumpDepth(depth); + dbgs() << "MBB: " << getMBB()->getNumber() << "\n"; + } + + MBBMRT(MachineBasicBlock *BB) : MBB(BB), BBSelectRegIn(0), BBSelectRegOut(0) { + setParent(nullptr); + } +}; + +class RegionMRT : public MRT { +protected: + MachineRegion *Region; + LinearizedRegion *LRegion; + MachineBasicBlock *Succ; + SetVector Children; + +public: + virtual RegionMRT *getRegionMRT() { return this; } + + virtual unsigned getBBSelectRegIn() { + return Children.back()->getBBSelectRegIn(); + } + + virtual unsigned getBBSelectRegOut() { + return Children.front()->getBBSelectRegIn(); + } + + void setLinearizedRegion(LinearizedRegion *LinearizeRegion) { + LRegion = LinearizeRegion; + } + + LinearizedRegion *getLinearizedRegion() { return LRegion; } + + MachineRegion *getMachineRegion() { return Region; } + + void addChild(MRT *Tree) { Children.insert(Tree); } + + SetVector *getChildren() { return &Children; } + + virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { + dumpDepth(depth); + dbgs() << "Region: " << (void *)Region << "\n"; + + if (LRegion) { + dumpDepth(depth); + dbgs() << "SelectRegIn: " << PrintReg(LRegion->getBBSelectRegIn(), TRI) + << "\n"; + dumpDepth(depth); + dbgs() << "SelectRegOut: " << PrintReg(LRegion->getBBSelectRegOut(), TRI) + << "\n"; + } + + dumpDepth(depth); + if (getSucc()) + dbgs() << "Succ: " << getSucc()->getNumber() << "\n"; + else + dbgs() << "Succ: none \n"; + for (auto MRTI : Children) { + MRTI->dump(TRI, depth + 1); + } + } + + MRT *getEntryTree() { return Children.back(); } + + MRT *getExitTree() { return Children.front(); } + + MachineBasicBlock *getEntry() { + MRT *Tree = Children.back(); + return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry() + : Tree->getMBBMRT()->getMBB(); + } + + MachineBasicBlock *getExit() { + MRT *Tree = Children.front(); + return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit() + : Tree->getMBBMRT()->getMBB(); + } + + void setSucc(MachineBasicBlock *MBB) { Succ = MBB; } + + MachineBasicBlock *getSucc() { return Succ; } + + bool contains(MachineBasicBlock *MBB) { + for (auto CI : Children) { + if (CI->isMBB()) { + if (MBB == CI->getMBBMRT()->getMBB()) { + return true; + } + } else { + if (CI->getRegionMRT()->contains(MBB)) { + return true; + } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr && + CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) { + return true; + } + } + } + return false; + } + + void replaceLiveOutReg(unsigned Register, unsigned NewRegister) { + LinearizedRegion *LRegion = getLinearizedRegion(); + LRegion->replaceLiveOut(Register, NewRegister); + for (auto &CI : Children) { + if (CI->isRegion()) { + CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister); + } + } + } + + RegionMRT(MachineRegion *MachineRegion) + : Region(MachineRegion), LRegion(nullptr), Succ(nullptr) { + setParent(nullptr); + } + + virtual ~RegionMRT() { + if (LRegion) { + delete LRegion; + } + + for (auto CI : Children) { + delete &(*CI); + } + } +}; + +static unsigned createBBSelectReg(const TargetInstrInfo *TII, + MachineRegisterInfo *MRI) { + return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32)); +} + +MachineBasicBlock * +MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, + DenseMap &RegionMap) { + for (auto &MFI : MF) { + MachineBasicBlock *ExitMBB = &MFI; + if (ExitMBB->succ_size() == 0) { + return ExitMBB; + } + } + llvm_unreachable("CFG has no exit block"); + return nullptr; +} + +RegionMRT *MRT::buildMRT(MachineFunction &MF, + const MachineRegionInfo *RegionInfo, + const TargetInstrInfo *TII, MachineRegisterInfo *MRI) { + SmallPtrSet PlacedRegions; + DenseMap RegionMap; + MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion(); + RegionMRT *Result = new RegionMRT(TopLevelRegion); + RegionMap[TopLevelRegion] = Result; + + // Insert the exit block first, we need it to be the merge node + // for the top level region. + MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap); + + unsigned BBSelectRegIn = createBBSelectReg(TII, MRI); + MBBMRT *ExitMRT = new MBBMRT(Exit); + RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT); + ExitMRT->setBBSelectRegIn(BBSelectRegIn); + + for (auto MBBI : post_order(&(MF.front()))) { + MachineBasicBlock *MBB = &(*MBBI); + + // Skip Exit since we already added it + if (MBB == Exit) { + continue; + } + + DEBUG(dbgs() << "Visiting BB#" << MBB->getNumber() << "\n"); + MBBMRT *NewMBB = new MBBMRT(MBB); + unsigned BBSelectRegIn = createBBSelectReg(TII, MRI); + unsigned BBSelectRegOut = BBSelectRegIn; + NewMBB->setBBSelectRegIn(BBSelectRegIn); + NewMBB->setBBSelectRegOut(BBSelectRegOut); + + MachineRegion *Region = RegionInfo->getRegionFor(MBB); + + // Ensure we have the MRT region + if (RegionMap.count(Region) == 0) { + RegionMRT *NewMRTRegion = new RegionMRT(Region); + RegionMap[Region] = NewMRTRegion; + + // Ensure all parents are in the RegionMap + MachineRegion *Parent = Region->getParent(); + while (RegionMap.count(Parent) == 0) { + RegionMRT *NewMRTParent = new RegionMRT(Parent); + NewMRTParent->addChild(NewMRTRegion); + RegionMap[Parent] = NewMRTParent; + NewMRTRegion = NewMRTParent; + Parent = Parent->getParent(); + } + RegionMap[Parent]->addChild(NewMRTRegion); + } + + // Add MBB to Region MRT + RegionMap[Region]->addChild(NewMBB); + RegionMap[Region]->setSucc(Region->getExit()); + } + return Result; +} + +void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, + MachineInstr *DefInstr, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + if (TRI->isVirtualRegister(Reg)) { + DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); + // If this is a source register to a PHI we are chaining, it + // must be live out. + if (PHIInfo.isSource(Reg)) { + DEBUG(dbgs() << "Add LiveOut (PHI): " << PrintReg(Reg, TRI) << "\n"); + addLiveOut(Reg); + } else { + // If this is live out of the MBB + for (auto &UI : MRI->use_operands(Reg)) { + if (UI.getParent()->getParent() != MBB) { // || + // PHIInfo.isSource(Reg)) { + DEBUG(dbgs() << "Add LiveOut (MBB BB#" << MBB->getNumber() + << "): " << PrintReg(Reg, TRI) << "\n"); + addLiveOut(Reg); + } else { + // If the use is in the same MBB we have to make sure + // it is after the def, otherwise it is live out in a loop + MachineInstr *UseInstr = UI.getParent(); + for (MachineBasicBlock::instr_iterator + MII = UseInstr->getIterator(), + MIE = UseInstr->getParent()->instr_end(); + MII != MIE; ++MII) { + if ((&(*MII)) == DefInstr) { + DEBUG(dbgs() << "Add LiveOut (Loop): " << PrintReg(Reg, TRI) + << "\n"); + addLiveOut(Reg); + } + } + } + } + } + } +} + +void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, + MachineInstr *DefInstr, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + if (TRI->isVirtualRegister(Reg)) { + DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); + for (auto &UI : MRI->use_operands(Reg)) { + if (!Region->contains(UI.getParent()->getParent())) { + DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region + << "): " << PrintReg(Reg, TRI) << "\n"); + addLiveOut(Reg); + } + } + } +} + +void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + DEBUG(dbgs() << "-Store Live Outs Begin (BB#" << MBB->getNumber() << ")-\n"); + for (auto &II : *MBB) { + for (auto &RI : II.defs()) { + storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo); + } + for (auto &IRI : II.implicit_operands()) { + if (IRI.isDef()) { + storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo); + } + } + } + + // If we have a successor with a PHI, source coming from this MBB we have to + // add the register as live out + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); + SI != E; ++SI) { + for (auto &II : *(*SI)) { + if (II.isPHI()) { + MachineInstr &PHI = II; + int numPreds = getPHINumInputs(PHI); + for (int i = 0; i < numPreds; ++i) { + if (getPHIPred(PHI, i) == MBB) { + unsigned PHIReg = getPHISourceReg(PHI, i); + DEBUG(dbgs() << "Add LiveOut (PhiSource BB#" << MBB->getNumber() + << " -> BB#" << (*SI)->getNumber() + << "): " << PrintReg(PHIReg, TRI) << "\n"); + addLiveOut(PHIReg); + } + } + } + } + } + + DEBUG(dbgs() << "-Store Live Outs Endn-\n"); +} + +void LinearizedRegion::storeLiveOuts(RegionMRT *Region, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo, + RegionMRT *CurrentTopRegion) { + // Fixme: Temporary imlementation, need to have real live out info + + MachineBasicBlock *Exit = Region->getSucc(); + + RegionMRT *TopRegion = + CurrentTopRegion == nullptr ? Region : CurrentTopRegion; + + // Check if exit is end of function, if so, no live outs. + if (Exit == nullptr) + return; + + auto Children = Region->getChildren(); + for (auto CI : *Children) { + if (CI->isMBB()) { + auto MBB = CI->getMBBMRT()->getMBB(); + for (auto &II : *MBB) { + for (auto &RI : II.defs()) { + storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, + TRI, PHIInfo); + } + for (auto &IRI : II.implicit_operands()) { + if (IRI.isDef()) { + storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI, + TRI, PHIInfo); + } + } + } + } else { + storeLiveOuts(CI->getRegionMRT(), MRI, TRI, PHIInfo, TopRegion); + } + } + + if (CurrentTopRegion == nullptr) { + auto Succ = Region->getSucc(); + for (auto &II : *Succ) { + if (II.isPHI()) { + MachineInstr &PHI = II; + int numPreds = getPHINumInputs(PHI); + for (int i = 0; i < numPreds; ++i) { + if (Region->contains(getPHIPred(PHI, i))) { + unsigned PHIReg = getPHISourceReg(PHI, i); + DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region + << "): " << PrintReg(PHIReg, TRI) << "\n"); + addLiveOut(PHIReg); + } + } + } + } + } +} + +void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { + OS << "Linearized Region (" << Entry->getNumber() << ", " + << (Exit == nullptr ? -1 : Exit->getNumber()) + << "): In:" << PrintReg(getBBSelectRegIn(), TRI) + << " Out:" << PrintReg(getBBSelectRegOut(), TRI) << " {"; + for (auto &LI : LiveOuts) { + OS << PrintReg(LI, TRI) << " "; + } + OS << "} \n"; +} + +void LinearizedRegion::setBBSelectRegIn(unsigned Reg) { + if (BBSelectRegIn) { + removeLiveOut(BBSelectRegIn); + } + BBSelectRegIn = Reg; +} + +unsigned LinearizedRegion::getBBSelectRegIn() { return BBSelectRegIn; } + +void LinearizedRegion::setBBSelectRegOut(unsigned Reg, bool IsLiveOut) { + if (BBSelectRegOut) { + removeLiveOut(BBSelectRegOut); + } + BBSelectRegOut = Reg; + if (IsLiveOut) { + addLiveOut(Reg); + } +} + +unsigned LinearizedRegion::getBBSelectRegOut() { return BBSelectRegOut; } + +void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; } + +bool LinearizedRegion::getHasLoop() { return HasLoop; } + +void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); } + +void LinearizedRegion::addLiveOuts(LinearizedRegion *LRegion) { + DenseSet *RegionLiveOuts = LRegion->getLiveOuts(); + for (auto R : *RegionLiveOuts) { + addLiveOut(R); + } +} + +void LinearizedRegion::removeLiveOut(unsigned Reg) { + if (isLiveOut(Reg)) + LiveOuts.erase(Reg); +} + +void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) { + if (isLiveOut(OldReg)) { + removeLiveOut(OldReg); + addLiveOut(NewReg); + } +} + +void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister, + MachineRegisterInfo *MRI, + bool ReplaceInside, bool ReplaceOutside, + bool IncludeLoopPHI) { + assert(Register != NewRegister && "Cannot replace a reg with itself"); + + DEBUG(dbgs() << "Pepareing to replace register (region): " + << PrintReg(Register, MRI->getTargetRegisterInfo()) << " with " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); + + // If we are replacing outside, we also need to update the LiveOuts + if (ReplaceOutside && isLiveOut(Register)) { + LinearizedRegion *Current = this; + while (Current != nullptr) { + DEBUG(dbgs() << "Region before register replace\n"); + DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); + Current->replaceLiveOut(Register, NewRegister); + DEBUG(dbgs() << "Region after register replace\n"); + DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); + Current = Current->getParent(); + } + } + + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), + E = MRI->reg_end(); + I != E;) { + MachineOperand &O = *I; + ++I; + + // We don't rewrite defs. + if (O.isDef()) + continue; + + bool IsInside = contains(O.getParent()->getParent()); + bool IsLoopPHI = IsInside && (O.getParent()->isPHI() && + O.getParent()->getParent() == getEntry()); + bool ShouldReplace = (IsInside && ReplaceInside) || + (!IsInside && ReplaceOutside) || + (IncludeLoopPHI && IsLoopPHI); + if (ShouldReplace) { + + if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { + DEBUG(dbgs() << "Trying to substitute physical register: " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); + llvm_unreachable("Cannot substitute physical registers"); + } else { + DEBUG(dbgs() << "Replacing register (region): " + << PrintReg(Register, MRI->getTargetRegisterInfo()) + << " with " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); + O.setReg(NewRegister); + } + } + } +} + +void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register, + unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI) { + replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs); +} + +void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register, + unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI) { + replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs); +} + +DenseSet *LinearizedRegion::getLiveOuts() { return &LiveOuts; } + +void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) { + Entry = NewEntry; +} + +MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; } + +void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; } + +MachineBasicBlock *LinearizedRegion::getExit() { return Exit; } + +void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); } + +bool LinearizedRegion::contains(MachineBasicBlock *MBB) { + return MBBs.count(MBB) == 1; +} + +bool LinearizedRegion::isLiveOut(unsigned Reg) { + return LiveOuts.count(Reg) == 1; +} + +bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) { + return MRI->def_begin(Reg) == MRI->def_end(); +} + +bool LinearizedRegion::isDefinedInRegion(unsigned Reg, + MachineRegisterInfo *MRI) { + bool NoDef = hasNoDef(Reg, MRI); + if (NoDef) { + return false; + } + + if (!MRI->hasOneDef(Reg)) { + DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has multiple defs\n"); + } + + assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); + MachineOperand *Def = &(*(MRI->def_begin(Reg))); + return contains(Def->getParent()->getParent()); +} + +// After the code has been structurized, what was flagged as kills +// before are no longer register kills. +void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) { + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + for (auto MBBI : MBBs) { + MachineBasicBlock *MBB = MBBI; + for (auto &II : *MBB) { + for (auto &RI : II.uses()) { + if (RI.isReg()) { + unsigned Reg = RI.getReg(); + if (TRI->isVirtualRegister(Reg)) { + if (hasNoDef(Reg, MRI)) + continue; + if (!MRI->hasOneDef(Reg)) { + DEBUG(this->getEntry()->getParent()->dump()); + DEBUG(dbgs() << PrintReg(Reg, TRI) << "\n"); + } + + if (MRI->def_begin(Reg) == MRI->def_end()) { + DEBUG(dbgs() << "Register " + << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has NO defs\n"); + } else if (!MRI->hasOneDef(Reg)) { + DEBUG(dbgs() << "Register " + << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has multiple defs\n"); + } + + assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); + MachineOperand *Def = &(*(MRI->def_begin(Reg))); + MachineOperand *UseOperand = &(RI); + bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB; + if (UseIsOutsideDefMBB && UseOperand->isKill()) { + DEBUG(dbgs() << "Removing kill flag on register: " + << PrintReg(Reg, TRI) << "\n"); + UseOperand->setIsKill(false); + } + } + } + } + } + } +} + +void LinearizedRegion::initLiveOut(RegionMRT *Region, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + storeLiveOuts(Region, MRI, TRI, PHIInfo); +} + +LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + setEntry(MBB); + setExit(MBB); + storeLiveOuts(MBB, MRI, TRI, PHIInfo); + MBBs.insert(MBB); + BBSelectRegOut = 0; + BBSelectRegIn = 0; + Parent = nullptr; +} + +LinearizedRegion::LinearizedRegion(RegionMRT *Region, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + setEntry(Region->getEntry()); + // We don't have a single exit block that is part of the region + // at this point. When the transform is performed this block + // will be created. + setExit(nullptr); + storeLiveOuts(Region, MRI, TRI, PHIInfo); + BBSelectRegOut = 0; + BBSelectRegIn = 0; + Parent = nullptr; +} + +LinearizedRegion::LinearizedRegion() { + setEntry(nullptr); + setExit(nullptr); + BBSelectRegOut = 0; + BBSelectRegIn = 0; + Parent = nullptr; +} + +LinearizedRegion::~LinearizedRegion() {} + +class MachineCFGStructurizer : public MachineFunctionPass { +private: + const MachineRegionInfo *Regions; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + unsigned BBSelectRegister; + PHILinearize PHIInfo; + DenseMap FallthroughMap; + + void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI, + SmallVector &RegionIndices); + void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, + SmallVector &RegionIndices); + void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, + SmallVector &PHINonRegionIndices); + + void storePHILinearizationInfoDest( + unsigned LDestReg, MachineInstr &PHI, + SmallVector *RegionIndices = nullptr); + + unsigned storePHILinearizationInfo(MachineInstr &PHI, + SmallVector *RegionIndices); + + void extractKilledPHIs(MachineBasicBlock *MBB); + void extractKilledPHIs(LinearizedRegion *LRegion); + + bool shrinkPHI(MachineInstr &PHI, SmallVector &PHIIndices, + unsigned *ReplaceReg); + + bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg, + MachineBasicBlock *SourceMBB, + SmallVector &PHIIndices, unsigned *ReplaceReg); + + void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg, + MachineBasicBlock *LastMerge, + SmallVector &PHIRegionIndices); + void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg, + MachineBasicBlock *IfMBB, + SmallVector &PHIRegionIndices); + void replaceLiveOutRegs(MachineInstr &PHI, + SmallVector &PHIRegionIndices, + unsigned CombinedSourceReg, + LinearizedRegion *LRegion); + void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge, + MachineInstr &PHI, LinearizedRegion *LRegion); + + void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge, + LinearizedRegion *LRegion); + void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB, + MachineInstr &PHI); + void rewriteRegionEntryPHIs(LinearizedRegion *Region, + MachineBasicBlock *IfMBB); + + bool regionIsSimpleIf(RegionMRT *Region); + + void transformSimpleIfRegion(RegionMRT *Region); + + bool regionIsSimpleLoop(RegionMRT *Region); + + void transformSimpleLoopRegion(RegionMRT *Region); + + void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II); + + void insertUnconditionalBranch(MachineBasicBlock *MBB, + MachineBasicBlock *Dest, + DebugLoc DL = DebugLoc()); + + MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region); + + void replaceRegisterOutsideMBB(MachineBasicBlock *MBB, unsigned Register, + unsigned NewRegister); + + void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, unsigned DestRegister, + unsigned IfSourceRegister, unsigned CodeSourceRegister, + bool IsUndefIfSource = false); + + MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB, + MachineBasicBlock *CodeBBStart, + MachineBasicBlock *CodeBBEnd, + MachineBasicBlock *SelectBB, unsigned IfReg, + bool InheritPreds); + + void createEntryPHIs(LinearizedRegion *CurrentRegion); + void resolvePHIInfos(MachineBasicBlock *FunctionEntry); + + void replaceRegisterWith(unsigned Register, unsigned NewRegister); + + MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB, + MachineBasicBlock *CodeBB, + LinearizedRegion *LRegion, + unsigned BBSelectRegIn, + unsigned BBSelectRegOut); + + MachineBasicBlock * + createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion, + LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, + unsigned BBSelectRegIn, unsigned BBSelectRegOut); + void ensureCondIsNotKilled(SmallVector Cond); + + void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + unsigned BBSelectReg); + + MachineInstr *getDefInstr(unsigned Reg); + void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, unsigned DestReg, + unsigned SourceReg); + bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion, + unsigned Register); + void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, + LinearizedRegion *LRegion); + + void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry, + MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion); + void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc, + LinearizedRegion *LRegion); + + MachineBasicBlock *splitEntry(LinearizedRegion *LRegion); + + LinearizedRegion *createLinearizedRegion(RegionMRT *Region); + + bool structurizeComplexRegion(RegionMRT *Region); + + bool structurizeRegion(RegionMRT *Region); + + bool structurizeRegions(RegionMRT *Region, bool isTopRegion); + +public: + static char ID; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + MachineCFGStructurizer() : MachineFunctionPass(ID) { + initializeMachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); + } + + void initFallthroughMap(MachineFunction &MF); + + void initializeSelectRegisters(RegionMRT *Region, unsigned ExistingEntryReg, + unsigned ExistingExitReg, + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII); + + RegionMRT *RMRT; + void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; } + + RegionMRT *getRegionMRT() { return RMRT; } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} + +char MachineCFGStructurizer::ID = 0; + +bool MachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) { + MachineBasicBlock *Entry = Region->getEntry(); + MachineBasicBlock *Succ = Region->getSucc(); + bool FoundBypass = false; + bool FoundIf = false; + + if (Entry->succ_size() != 2) { + return false; + } + + for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(), + E = Entry->succ_end(); + SI != E; ++SI) { + MachineBasicBlock *Current = *SI; + + if (Current == Succ) { + FoundBypass = true; + } else if ((Current->succ_size() == 1) && + *(Current->succ_begin()) == Succ) { + FoundIf = true; + } + } + + return FoundIf && FoundBypass; +} + +void MachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) { + MachineBasicBlock *Entry = Region->getEntry(); + MachineBasicBlock *Exit = Region->getExit(); + TII->convertNonUniformIfRegion(Entry, Exit); +} + +bool MachineCFGStructurizer::regionIsSimpleLoop(RegionMRT *Region) { + MachineBasicBlock *Entry = Region->getEntry(); + + if (Entry->succ_size() != 1) { + return false; + } + + int NumRegionExitEdges = 0; + MachineBasicBlock *BackBlock = NULL; + for (MachineBasicBlock::const_pred_iterator PI = Entry->succ_begin(), + PE = Entry->succ_end(); + PI != PE; ++PI) { + MachineBasicBlock *CurrentSP = *PI; + if (Region->contains(CurrentSP)) { + BackBlock = CurrentSP; + } + } + + bool HasBackedge = false; + + for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(), + SE = Entry->succ_end(); + SI != SE; ++SI) { + MachineBasicBlock *CurrentBS = *SI; + if (CurrentBS == Entry) { + HasBackedge = true; + } + } + + return NumRegionExitEdges == 1 && BackBlock->succ_size() == 2 && HasBackedge; +} + +void MachineCFGStructurizer::transformSimpleLoopRegion(RegionMRT *Region) { + MachineBasicBlock *Entry = Region->getEntry(); + MachineBasicBlock *BackBlock = NULL; + for (MachineBasicBlock::const_pred_iterator PI = Entry->succ_begin(), + PE = Entry->succ_end(); + PI != PE; ++PI) { + MachineBasicBlock *CurrentSP = *PI; + if (Region->contains(CurrentSP)) { + BackBlock = CurrentSP; + } + } + + TII->convertNonUniformLoopRegion(Entry, BackBlock); +} + +// If a region region is just a sequence of regions (and the exit +// block in the case of the top level region), we can simply skip +// linearizing it, because it is already linear/ +// +bool regionIsSequence(RegionMRT *Region) { + auto Children = Region->getChildren(); + for (auto CI : *Children) { + if (!CI->isRegion()) { + if (CI->getMBBMRT()->getMBB()->succ_size() > 1) { + return false; + } + } + } + return true; +} + +void MachineCFGStructurizer::getPHIRegionIndices( + RegionMRT *Region, MachineInstr &PHI, + SmallVector &PHIRegionIndices) { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + MachineBasicBlock *Pred = getPHIPred(PHI, i); + if (Region->contains(Pred)) { + PHIRegionIndices.push_back(i); + } + } +} + +void MachineCFGStructurizer::getPHIRegionIndices( + LinearizedRegion *Region, MachineInstr &PHI, + SmallVector &PHIRegionIndices) { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + MachineBasicBlock *Pred = getPHIPred(PHI, i); + if (Region->contains(Pred)) { + PHIRegionIndices.push_back(i); + } + } +} + +void MachineCFGStructurizer::getPHINonRegionIndices( + LinearizedRegion *Region, MachineInstr &PHI, + SmallVector &PHINonRegionIndices) { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + MachineBasicBlock *Pred = getPHIPred(PHI, i); + if (!Region->contains(Pred)) { + PHINonRegionIndices.push_back(i); + } + } +} + +void MachineCFGStructurizer::storePHILinearizationInfoDest( + unsigned LDestReg, MachineInstr &PHI, + SmallVector *RegionIndices) { + if (RegionIndices) { + for (auto i : *RegionIndices) { + PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i)); + } + } else { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i)); + } + } +} + +unsigned MachineCFGStructurizer::storePHILinearizationInfo( + MachineInstr &PHI, SmallVector *RegionIndices) { + unsigned DestReg = getPHIDestReg(PHI); + unsigned LinearizeDestReg = + MRI->createVirtualRegister(MRI->getRegClass(DestReg)); + PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc()); + storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices); + return LinearizeDestReg; +} + +void MachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) { + // We need to create a new chain for the killed phi, but there is no + // need to do the renaming outside or inside the block. + SmallPtrSet PHIs; + for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), + E = MBB->instr_end(); + I != E; ++I) { + MachineInstr &Instr = *I; + if (Instr.isPHI()) { + unsigned PHIDestReg = getPHIDestReg(Instr); + DEBUG(dbgs() << "Extractking killed phi:\n"); + DEBUG(Instr.dump()); + PHIs.insert(&Instr); + PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc()); + storePHILinearizationInfoDest(PHIDestReg, Instr); + } + } + + for (auto PI : PHIs) { + PI->eraseFromParent(); + } +} + +void MachineCFGStructurizer::extractKilledPHIs(LinearizedRegion *LRegion) { + // PHIs can only exist in the entry block. + extractKilledPHIs(LRegion->getEntry()); +} + +static bool isPHIRegionIndex(SmallVector PHIRegionIndices, + unsigned Index) { + for (auto i : PHIRegionIndices) { + if (i == Index) + return true; + } + return false; +} + +bool MachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, + SmallVector &PHIIndices, + unsigned *ReplaceReg) { + return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg); +} + +bool MachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, + unsigned CombinedSourceReg, + MachineBasicBlock *SourceMBB, + SmallVector &PHIIndices, + unsigned *ReplaceReg) { + DEBUG(dbgs() << "Shrink PHI: "); + DEBUG(PHI.dump()); + DEBUG(dbgs() << " to " << PrintReg(getPHIDestReg(PHI), TRI) + << " = PHI("); + + bool Replaced = false; + unsigned NumInputs = getPHINumInputs(PHI); + int SingleExternalEntryIndex = -1; + for (unsigned i = 0; i < NumInputs; ++i) { + if (!isPHIRegionIndex(PHIIndices, i)) { + if (SingleExternalEntryIndex == -1) { + // Single entry + SingleExternalEntryIndex = i; + } else { + // Multiple entries + SingleExternalEntryIndex = -2; + } + } + } + + if (SingleExternalEntryIndex > -1) { + *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex); + // We should not rewrite the code, we should only pick up the single value + // that represents the shrunk PHI. + Replaced = true; + } else { + MachineBasicBlock *MBB = PHI.getParent(); + MachineInstrBuilder MIB = + BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), + getPHIDestReg(PHI)); + if (SourceMBB) { + MIB.addReg(CombinedSourceReg); + MIB.addMBB(SourceMBB); + DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" + << SourceMBB->getNumber()); + } + + for (unsigned i = 0; i < NumInputs; ++i) { + if (isPHIRegionIndex(PHIIndices, i)) { + continue; + } + unsigned SourceReg = getPHISourceReg(PHI, i); + MachineBasicBlock *SourcePred = getPHIPred(PHI, i); + MIB.addReg(SourceReg); + MIB.addMBB(SourcePred); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" + << SourcePred->getNumber()); + } + DEBUG(dbgs() << ")\n"); + } + PHI.eraseFromParent(); + return Replaced; +} + +void MachineCFGStructurizer::replacePHI( + MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge, + SmallVector &PHIRegionIndices) { + DEBUG(dbgs() << "Replace PHI: "); + DEBUG(PHI.dump()); + DEBUG(dbgs() << " with " << PrintReg(getPHIDestReg(PHI), TRI) + << " = PHI("); + + bool HasExternalEdge = false; + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + if (!isPHIRegionIndex(PHIRegionIndices, i)) { + HasExternalEdge = true; + } + } + + if (HasExternalEdge) { + MachineBasicBlock *MBB = PHI.getParent(); + MachineInstrBuilder MIB = + BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), + getPHIDestReg(PHI)); + MIB.addReg(CombinedSourceReg); + MIB.addMBB(LastMerge); + DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" + << LastMerge->getNumber()); + for (unsigned i = 0; i < NumInputs; ++i) { + if (isPHIRegionIndex(PHIRegionIndices, i)) { + continue; + } + unsigned SourceReg = getPHISourceReg(PHI, i); + MachineBasicBlock *SourcePred = getPHIPred(PHI, i); + MIB.addReg(SourceReg); + MIB.addMBB(SourcePred); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" + << SourcePred->getNumber()); + } + DEBUG(dbgs() << ")\n"); + } else { + replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg); + } + PHI.eraseFromParent(); +} + +void MachineCFGStructurizer::replaceEntryPHI( + MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB, + SmallVector &PHIRegionIndices) { + + DEBUG(dbgs() << "Replace entry PHI: "); + DEBUG(PHI.dump()); + DEBUG(dbgs() << " with " << PrintReg(getPHIDestReg(PHI), TRI) + << " = PHI("); + + MachineBasicBlock *MBB = PHI.getParent(); + MachineInstrBuilder MIB = + BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), + getPHIDestReg(PHI)); + MIB.addReg(CombinedSourceReg); + MIB.addMBB(IfMBB); + DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" + << IfMBB->getNumber()); + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + if (isPHIRegionIndex(PHIRegionIndices, i)) { + continue; + } + unsigned SourceReg = getPHISourceReg(PHI, i); + MachineBasicBlock *SourcePred = getPHIPred(PHI, i); + MIB.addReg(SourceReg); + MIB.addMBB(SourcePred); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" + << SourcePred->getNumber()); + } + DEBUG(dbgs() << ")\n"); + PHI.eraseFromParent(); +} + +void MachineCFGStructurizer::replaceLiveOutRegs( + MachineInstr &PHI, SmallVector &PHIRegionIndices, + unsigned CombinedSourceReg, LinearizedRegion *LRegion) { + bool WasLiveOut = false; + for (auto PII : PHIRegionIndices) { + unsigned Reg = getPHISourceReg(PHI, PII); + if (LRegion->isLiveOut(Reg)) { + LRegion->removeLiveOut(Reg); + WasLiveOut = true; + } + } + + if (WasLiveOut) + LRegion->addLiveOut(CombinedSourceReg); +} + +void MachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region, + MachineBasicBlock *LastMerge, + MachineInstr &PHI, + LinearizedRegion *LRegion) { + SmallVector PHIRegionIndices; + getPHIRegionIndices(Region, PHI, PHIRegionIndices); + unsigned LinearizedSourceReg = + storePHILinearizationInfo(PHI, &PHIRegionIndices); + + replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices); + replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion); +} + +void MachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region, + MachineBasicBlock *IfMBB, + MachineInstr &PHI) { + SmallVector PHINonRegionIndices; + getPHINonRegionIndices(Region, PHI, PHINonRegionIndices); + unsigned LinearizedSourceReg = + storePHILinearizationInfo(PHI, &PHINonRegionIndices); + replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices); +} + +static void collectPHIs(MachineBasicBlock *MBB, + SmallVector &PHIs) { + for (auto &BBI : *MBB) { + if (BBI.isPHI()) { + PHIs.push_back(&BBI); + } + } +} + +void MachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region, + MachineBasicBlock *LastMerge, + LinearizedRegion *LRegion) { + SmallVector PHIs; + auto Exit = Region->getSucc(); + if (Exit == nullptr) + return; + + collectPHIs(Exit, PHIs); + + for (auto PHII : PHIs) { + rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion); + } +} + +void MachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region, + MachineBasicBlock *IfMBB) { + SmallVector PHIs; + auto Entry = Region->getEntry(); + + collectPHIs(Entry, PHIs); + + for (auto PHII : PHIs) { + rewriteRegionEntryPHI(Region, IfMBB, *PHII); + } +} + +void MachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB, + MachineBasicBlock *Dest, + DebugLoc DL) { + MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator(); + bool HasTerminator = Terminator != MBB->instr_end(); + if (HasTerminator) { + TII->ReplaceTailWithBranchTo(Terminator, Dest); + } + if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(Dest)) { + TII->InsertUnconditionalBranch(*MBB, Dest, DL); + } +} + +static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) { + MachineBasicBlock *result = nullptr; + for (auto &MFI : MF) { + if (MFI.succ_size() == 0) { + if (result == nullptr) { + result = &MFI; + } else { + return nullptr; + } + } + } + + return result; +} + +static bool hasOneExitNode(MachineFunction &MF) { + return getSingleExitNode(MF) != nullptr; +} + +MachineBasicBlock * +MachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) { + auto Exit = Region->getSucc(); + + // If the exit is the end of the function, we just use the existing + MachineFunction *MF = Region->getEntry()->getParent(); + if (Exit == nullptr && hasOneExitNode(*MF)) { + return &(*(--(Region->getEntry()->getParent()->end()))); + } + + MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock(); + if (Exit == nullptr) { + MachineFunction::iterator ExitIter = MF->end(); + MF->insert(ExitIter, LastMerge); + } else { + MachineFunction::iterator ExitIter = Exit->getIterator(); + MF->insert(ExitIter, LastMerge); + LastMerge->addSuccessor(Exit); + insertUnconditionalBranch(LastMerge, Exit); + DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() << "\n"); + } + return LastMerge; +} + +void MachineCFGStructurizer::replaceRegisterOutsideMBB(MachineBasicBlock *MBB, + unsigned Register, + unsigned NewRegister) { + assert(Register != NewRegister && "Cannot replace a reg with itself"); + + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), + E = MRI->reg_end(); + I != E;) { + MachineOperand &O = *I; + ++I; + if (O.getParent()->getParent() != MBB) { + if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { + llvm_unreachable("Cannot substitute physical registers"); + // We don't handle physical registers, but if we need to in the future + // This is how we do it: O.substPhysReg(NewRegister, *TRI); + } else { + O.setReg(NewRegister); + } + } + } +} + +void MachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + unsigned DestRegister, + unsigned IfSourceRegister, + unsigned CodeSourceRegister, + bool IsUndefIfSource) { + // If this is the function exit block, we don't need a phi. + if (MergeBB->succ_begin() == MergeBB->succ_end()) { + return; + } + DEBUG(dbgs() << "Merge PHI (BB#" << MergeBB->getNumber() + << "): " << PrintReg(DestRegister, TRI) << " = PHI(" + << PrintReg(IfSourceRegister, TRI) << ", BB#" + << IfBB->getNumber() << PrintReg(CodeSourceRegister, TRI) + << ", BB#" << CodeBB->getNumber() << ")\n"); + DebugLoc DL = MergeBB->findDebugLoc(MergeBB->begin()); + MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL, + TII->get(TargetOpcode::PHI), DestRegister); + if (IsUndefIfSource && false) { + MIB.addReg(IfSourceRegister, RegState::Undef); + } else { + MIB.addReg(IfSourceRegister); + } + MIB.addMBB(IfBB); + MIB.addReg(CodeSourceRegister); + MIB.addMBB(CodeBB); +} + +static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) { + for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), + E = MBB->succ_end(); + PI != E; ++PI) { + if ((*PI) != MBB) { + (MBB)->removeSuccessor(*PI); + } + } +} + +static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, + MachineBasicBlock *EndMBB) { + + // We have to check against the StartMBB successor becasuse a + // structurized region with a loop will have the entry block split, + // and the backedge will go to the entry successor. + DenseSet> Succs; + unsigned SuccSize = StartMBB->succ_size(); + if (SuccSize > 0) { + MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin()); + for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(), + E = EndMBB->succ_end(); + PI != E; ++PI) { + // Either we have a back-edge to the entry block, or a back-edge to the + // succesor of the entry block since the block may be split. + if ((*PI) != StartMBB && + !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) { + Succs.insert( + std::pair(EndMBB, *PI)); + } + } + } + + for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(), + E = StartMBB->pred_end(); + PI != E; ++PI) { + if ((*PI) != EndMBB) { + Succs.insert( + std::pair(*PI, StartMBB)); + } + } + + for (auto SI : Succs) { + std::pair Edge = SI; + DEBUG(dbgs() << "Removing edge: BB#" << Edge.first->getNumber() << " -> BB#" + << Edge.second->getNumber() << "\n"); + Edge.first->removeSuccessor(Edge.second); + } +} + +MachineBasicBlock *MachineCFGStructurizer::createIfBlock( + MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart, + MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg, + bool InheritPreds) { + MachineFunction *MF = MergeBB->getParent(); + MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock(); + + if (InheritPreds) { + for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(), + E = CodeBBStart->pred_end(); + PI != E; ++PI) { + if ((*PI) != CodeBBEnd) { + MachineBasicBlock *Pred = (*PI); + Pred->addSuccessor(IfBB); + for (auto &TI : Pred->terminators()) { + for (auto &UI : TI.uses()) { + if (UI.isMBB() && UI.getMBB() == CodeBBStart) { + UI.setMBB(IfBB); + } + } + } + } + } + } + + removeExternalCFGEdges(CodeBBStart, CodeBBEnd); + + auto CodeBBStartI = CodeBBStart->getIterator(); + auto CodeBBEndI = CodeBBEnd->getIterator(); + auto MergeIter = MergeBB->getIterator(); + MF->insert(MergeIter, IfBB); + MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI); + IfBB->addSuccessor(MergeBB); + IfBB->addSuccessor(CodeBBStart); + + DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n"); + // Ensure that the MergeBB is a succesor of the CodeEndBB. + if (!CodeBBEnd->isSuccessor(MergeBB)) + CodeBBEnd->addSuccessor(MergeBB); + + DEBUG(dbgs() << "Moved MBB#" << CodeBBStart->getNumber() << " through MBB#" + << CodeBBEnd->getNumber() << "\n"); + + // If we have a single predecessor we can find a reasonable debug location + MachineBasicBlock *SinglePred = + CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin()) : nullptr; + DebugLoc DL = SinglePred + ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator()) + : DebugLoc(); + + unsigned Reg = + TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg, + SelectBB->getNumber() /* CodeBBStart->getNumber() */); + if (&(*(IfBB->getParent()->begin())) == IfBB) { + TII->insertAssign(*IfBB, IfBB->begin(), DL, IfReg, + CodeBBStart->getNumber()); + } + MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); + ArrayRef Cond(RegOp); + TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL); + + return IfBB; +} + +void MachineCFGStructurizer::ensureCondIsNotKilled( + SmallVector Cond) { + if (Cond.size() != 1) + return; + if (!Cond[0].isReg()) + return; + + unsigned CondReg = Cond[0].getReg(); + for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) { + (*UI).setIsKill(false); + } +} + +void MachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + unsigned BBSelectReg) { + MachineBasicBlock *TrueBB = nullptr; + MachineBasicBlock *FalseBB = nullptr; + SmallVector Cond; + MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB]; + TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond); + + DebugLoc DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator()); + + if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) { + // This is an exit block, hence no successors. We will assign the bb select + // register + // to the entry block. + TII->insertAssign(*CodeBB, CodeBB->getFirstTerminator(), DL, BBSelectReg, + CodeBB->getParent()->begin()->getNumber()); + insertUnconditionalBranch(CodeBB, MergeBB, DL); + return; + } + + if (FalseBB == nullptr && TrueBB == nullptr) { + TrueBB = FallthroughBB; + } else if (TrueBB != nullptr) { + FalseBB = FallthroughBB ? FallthroughBB : FalseBB; + } + + if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) { + TII->insertAssign(*CodeBB, CodeBB->getFirstTerminator(), DL, BBSelectReg, + TrueBB->getNumber()); + } else { + const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg); + unsigned TrueBBReg = MRI->createVirtualRegister(RegClass); + unsigned FalseBBReg = MRI->createVirtualRegister(RegClass); + TII->insertAssign(*CodeBB, CodeBB->getFirstTerminator(), DL, TrueBBReg, + TrueBB->getNumber()); + TII->insertAssign(*CodeBB, CodeBB->getFirstTerminator(), DL, FalseBBReg, + FalseBB->getNumber()); + ensureCondIsNotKilled(Cond); + TII->insertSelect(*CodeBB, CodeBB->getFirstTerminator(), DL, BBSelectReg, + Cond, TrueBBReg, FalseBBReg); + } + + insertUnconditionalBranch(CodeBB, MergeBB, DL); +} + +MachineInstr *MachineCFGStructurizer::getDefInstr(unsigned Reg) { + if (MRI->def_begin(Reg) == MRI->def_end()) { + DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has NO defs\n"); + } else if (!MRI->hasOneDef(Reg)) { + DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has multiple defs\n"); + DEBUG(dbgs() << "DEFS BEGIN:\n"); + for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { + DEBUG(DI->getParent()->dump()); + } + DEBUG(dbgs() << "DEFS END\n"); + } + + assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); + return (*(MRI->def_begin(Reg))).getParent(); +} + +void MachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, + unsigned DestReg, + unsigned SourceReg) { + // In this function we know we are part of a chain already, so we need + // to add the registers to the existing chain, and rename the register + // inside the region. + MachineInstr *DefInstr = getDefInstr(SourceReg); + if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB) { + // Handle the case where the def is a PHI-def, then we only + // need to do renaming. Special care needs to be taken if the + // PHI-def is part of an existing chain, or if a new one needs to be + // created. + InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI); + + // We collect all PHI Information, and if we are at the region entry, + // all PHIs will be removed, and then re-introduced if needed. + storePHILinearizationInfoDest(DestReg, *DefInstr); + // We have picked up all the information we need now and can remove + // the PHI + PHIInfo.removeSource(DestReg, SourceReg); + DefInstr->eraseFromParent(); + } else { + InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI); + + const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg); + unsigned NextDestReg = MRI->createVirtualRegister(RegClass); + bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1; + DEBUG(dbgs() << "Insert Chained PHI\n"); + insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg, + SourceReg, IsLastDef); + + PHIInfo.removeSource(DestReg, SourceReg); + if (IsLastDef) { + DebugLoc DL = IfBB->findDebugLoc(IfBB->getFirstTerminator()); + TII->insertAssign(*IfBB, IfBB->getFirstTerminator(), DL, NextDestReg, 0); + PHIInfo.deleteDef(DestReg); + } else { + PHIInfo.replaceDef(DestReg, NextDestReg); + } + } +} + +bool MachineCFGStructurizer::containsDef(MachineBasicBlock *MBB, + LinearizedRegion *InnerRegion, + unsigned Register) { + return getDefInstr(Register)->getParent() == MBB || + InnerRegion->contains(getDefInstr(Register)->getParent()); +} + +void MachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, + LinearizedRegion *LRegion) { + DenseSet *LiveOuts = InnerRegion->getLiveOuts(); + SmallVector OldLiveOuts; + for (auto OLI : *LiveOuts) { + OldLiveOuts.push_back(OLI); + } + + for (auto LI : OldLiveOuts) { + DEBUG(dbgs() << "LiveOut: " << PrintReg(LI, TRI)); + if (!containsDef(CodeBB, InnerRegion, LI)) { + // If the register simly lives through the CodeBB, we don't have + // to rewrite anything since the register is not defined in this + // part of the code. + DEBUG(dbgs() << "- through"); + continue; + } + DEBUG(dbgs() << "\n"); + unsigned Reg = LI; + if (!PHIInfo.isSource(Reg)) { + // If the register is just a live out def and not part of a phi + // chain, we need to create a PHI node to handle the if region, + // and replace all uses outside of the region with the new dest + // register. + const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); + unsigned PHIDestReg = MRI->createVirtualRegister(RegClass); + unsigned IfSourceReg = MRI->createVirtualRegister(RegClass); + // Create initializer, this value is never used, but is needed + // to satisfy SSA. + DEBUG(dbgs() << "Initializer for reg: " << PrintReg(Reg) << "\n"); + TII->insertAssign(*IfBB, IfBB->getFirstTerminator(), DebugLoc(), + IfSourceReg, 0); + + InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI); + DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n"); + insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg, + IfSourceReg, Reg, true); + } + + unsigned DestReg; + while (PHIInfo.findDest(LI, DestReg)) { + // If this register is a PHI Source, we need to update the PHI + // chain. + insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, LI); + } + } + PHIInfo.dump(MRI); +} + +void MachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) { + MachineBasicBlock *Entry = CurrentRegion->getEntry(); + MachineBasicBlock *Exit = CurrentRegion->getExit(); + MachineBasicBlock *Pred = *(Entry->pred_begin()); + + DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() + << " Pred: " << Pred->getNumber() << "\n"); + + PHIInfo.dump(MRI); + + for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; + ++DRI) { + + int NumSources = 0; + unsigned DestReg = *DRI; + auto SE = PHIInfo.sources_end(DestReg); + + for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { + NumSources++; + } + + if (NumSources == 1) { + auto SRI = PHIInfo.sources_begin(DestReg); + unsigned SourceReg = *SRI; + replaceRegisterWith(DestReg, SourceReg); + } else { + DebugLoc DL = Entry->findDebugLoc(Entry->begin()); + MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL, + TII->get(TargetOpcode::PHI), DestReg); + DEBUG(dbgs() << "Entry PHI " << PrintReg(*DRI, TRI) << " = PHI("); + + for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { + unsigned SourceReg = *SRI; + MIB.addReg(SourceReg); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#"); + + if (CurrentRegion->isDefinedInRegion(SourceReg, MRI)) { + MIB.addMBB(Exit); + DEBUG(dbgs() << Exit->getNumber()); + } else { + MIB.addMBB(Pred); + DEBUG(dbgs() << Pred->getNumber()); + } + } + DEBUG(dbgs() << ")\n"); + } + } + PHIInfo.clear(); +} + +void MachineCFGStructurizer::replaceRegisterWith(unsigned Register, + unsigned NewRegister) { + assert(Register != NewRegister && "Cannot replace a reg with itself"); + + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), + E = MRI->reg_end(); + I != E;) { + MachineOperand &O = *I; + ++I; + if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { + DEBUG(dbgs() << "Trying to substitute physical register: " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); + llvm_unreachable("Cannot substitute physical registers"); + // We don't handle physical registers, but if we need to + // in the future This is how we do it: + // O.substPhysReg(NewRegister, *TRI); + } else { + DEBUG(dbgs() << "Replacing register: " + << PrintReg(Register, MRI->getTargetRegisterInfo()) + << " with " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); + O.setReg(NewRegister); + } + } + PHIInfo.deleteDef(Register); + + getRegionMRT()->replaceLiveOutReg(Register, NewRegister); + + DEBUG(PHIInfo.dump(MRI)); +} + +void MachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) { + DEBUG(dbgs() << "Resolve PHI Infos\n"); + DEBUG(PHIInfo.dump(MRI)); + for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; + ++DRI) { + unsigned DestReg = *DRI; + DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) << "\n"); + auto SRI = PHIInfo.sources_begin(DestReg); + unsigned SourceReg = *SRI; + DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) + << " SourceReg: " << PrintReg(SourceReg, TRI) << "\n"); + + assert(PHIInfo.sources_end(DestReg) == ++SRI && + "More than one phi source in entry node"); + replaceRegisterWith(DestReg, SourceReg); + } +} + +static bool isFunctionEntryBlock(MachineBasicBlock *MBB) { + return ((&(*(MBB->getParent()->begin()))) == MBB); +} + +MachineBasicBlock *MachineCFGStructurizer::createIfRegion( + MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB, + LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn, + unsigned BBSelectRegOut) { + if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) { + // Handle non-loop function entry block. + // We need to allow loops to the entry block and then + rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); + resolvePHIInfos(CodeBB); + removeExternalCFGSuccessors(CodeBB); + CodeBB->addSuccessor(MergeBB); + CurrentRegion->addMBB(CodeBB); + return nullptr; + } + if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) { + // Handle non-loop region entry block. + rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); + createEntryPHIs(CurrentRegion); + removeExternalCFGSuccessors(CodeBB); + CodeBB->addSuccessor(MergeBB); + CurrentRegion->addMBB(CodeBB); + return nullptr; + } else { + // Handle internal block. + const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn); + unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass); + rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg); + bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB; + MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB, + BBSelectRegIn, IsRegionEntryBB); + CurrentRegion->addMBB(IfBB); + // If this is the entry block we need to make the If block the new + // linearized region entry. + if (IsRegionEntryBB) { + CurrentRegion->setEntry(IfBB); + + if (CurrentRegion->getHasLoop()) { + MachineBasicBlock *RegionExit = CurrentRegion->getExit(); + MachineBasicBlock *ETrueBB = nullptr; + MachineBasicBlock *EFalseBB = nullptr; + SmallVector ECond; + + DebugLoc DL = + DebugLoc(); // RegionExit->getFirstTerminator()->getDebugLoc(); + TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); + TII->removeBranch(*RegionExit); + + // We need to create a backedge if there is a loop + unsigned Reg = TII->insertNE( + RegionExit, RegionExit->instr_end(), DL, BBSelectRegOut, + CurrentRegion->getRegionMRT()->getEntry()->getNumber()); + MachineOperand RegOp = + MachineOperand::CreateReg(Reg, false, false, true); + ArrayRef Cond(RegOp); + DEBUG(dbgs() << "RegionExitReg: "); + DEBUG(Cond[0].print(dbgs(), TRI)); + DEBUG(dbgs() << "\n"); + TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, + Cond, DebugLoc()); + RegionExit->addSuccessor(CurrentRegion->getEntry()); + } + } + CurrentRegion->addMBB(CodeBB); + LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo); + + InnerRegion.setParent(CurrentRegion); + DEBUG(dbgs() << "Insert BB Select PHI (BB)\n"); + insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn, + CodeBBSelectReg); + InnerRegion.addMBB(MergeBB); + + DEBUG(InnerRegion.print(dbgs(), TRI)); + rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion); + extractKilledPHIs(CodeBB); + if (IsRegionEntryBB) { + createEntryPHIs(CurrentRegion); + } + return IfBB; + } +} + +MachineBasicBlock *MachineCFGStructurizer::createIfRegion( + MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion, + LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, + unsigned BBSelectRegIn, unsigned BBSelectRegOut) { + unsigned CodeBBSelectReg = InnerRegion->getBBSelectRegOut(); + MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry(); + MachineBasicBlock *CodeExitBB = InnerRegion->getExit(); + MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB, + SelectBB, BBSelectRegIn, true); + CurrentRegion->addMBB(IfBB); + rewriteLiveOutRegs(IfBB, CodeEntryBB, MergeBB, InnerRegion, CurrentRegion); + DEBUG(dbgs() << "Insert BB Select PHI (region)\n"); + insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn, + CodeBBSelectReg); + + rewriteRegionEntryPHIs(InnerRegion, IfBB); + return IfBB; +} + +void MachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI, + MachineBasicBlock *Entry, + MachineBasicBlock *EntrySucc, + LinearizedRegion *LRegion) { + SmallVector PHIRegionIndices; + getPHIRegionIndices(LRegion, PHI, PHIRegionIndices); + + assert(PHIRegionIndices.size() == 1); + + unsigned RegionIndex = PHIRegionIndices[0]; + unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex); + MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex); + unsigned PHIDest = getPHIDestReg(PHI); + unsigned PHISource = PHIDest; + unsigned ReplaceReg; + + if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) { + PHISource = ReplaceReg; + } + + const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest); + unsigned NewDestReg = MRI->createVirtualRegister(RegClass); + LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI); + MachineInstrBuilder MIB = + BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(), + TII->get(TargetOpcode::PHI), NewDestReg); + DEBUG(dbgs() << "Split Entry PHI " << PrintReg(NewDestReg, TRI) + << " = PHI("); + MIB.addReg(PHISource); + MIB.addMBB(Entry); + DEBUG(dbgs() << PrintReg(PHISource, TRI) << ", BB#" << Entry->getNumber()); + MIB.addReg(RegionSourceReg); + MIB.addMBB(RegionSourceMBB); + DEBUG(dbgs() << " ," << PrintReg(RegionSourceReg, TRI) << ", BB#" + << RegionSourceMBB->getNumber() << ")\n"); +} + +void MachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry, + MachineBasicBlock *EntrySucc, + LinearizedRegion *LRegion) { + SmallVector PHIs; + collectPHIs(Entry, PHIs); + + for (auto PHII : PHIs) { + splitLoopPHI(*PHII, Entry, EntrySucc, LRegion); + } +} + +// Split the entry block separating PHI-nodes and the rest of the code +// This is needed to insert an initializer for the bb select register +// inloop regions. + +MachineBasicBlock * +MachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) { + MachineBasicBlock *Entry = LRegion->getEntry(); + MachineBasicBlock *EntrySucc = Entry->split(Entry->getFirstNonPHI()); + MachineBasicBlock *Exit = LRegion->getExit(); + + DEBUG(dbgs() << "Split BB#" << Entry->getNumber() << " to BB#" + << Entry->getNumber() << " -> BB#" << EntrySucc->getNumber() + << "\n"); + LRegion->addMBB(EntrySucc); + + // Make the backedge go to Entry Succ + if (Exit->isSuccessor(Entry)) { + Exit->removeSuccessor(Entry); + } + Exit->addSuccessor(EntrySucc); + MachineInstr &Branch = *(Exit->instr_rbegin()); + for (auto &UI : Branch.uses()) { + if (UI.isMBB() && UI.getMBB() == Entry) { + UI.setMBB(EntrySucc); + } + } + + splitLoopPHIs(Entry, EntrySucc, LRegion); + + return EntrySucc; +} + +LinearizedRegion * +MachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region) { + LinearizedRegion *LRegion = Region->getLinearizedRegion(); + LRegion->initLiveOut(Region, MRI, TRI, PHIInfo); + LRegion->setEntry(Region->getEntry()); + return LRegion; +} + +static void removeOldExitPreds(RegionMRT *Region) { + MachineBasicBlock *Exit = Region->getSucc(); + if (Exit == nullptr) { + return; + } + for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(), + E = Exit->pred_end(); + PI != E; ++PI) { + if (Region->contains(*PI)) { + (*PI)->removeSuccessor(Exit); + } + } +} + +static bool containsNewBackedge(RegionMRT *Region) { + SmallPtrSet MBBs; + + SetVector *Children = Region->getChildren(); + // Need to traverse this in reverse since it is in post order. + for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) { + if ((*CI)->isMBB()) { + MachineBasicBlock *MBB = (*CI)->getMBBMRT()->getMBB(); + MBBs.insert(MBB); + for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { + if (MBBs.count(*SI) != 0) { + return true; + } + } + } + } + return false; +} + +bool MachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { + auto *LRegion = createLinearizedRegion(Region); + LRegion->setHasLoop(containsNewBackedge(Region)); + MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region); + MachineBasicBlock *CurrentMerge = LastMerge; + LRegion->addMBB(LastMerge); + LRegion->setExit(LastMerge); + + rewriteRegionExitPHIs(Region, LastMerge, LRegion); + removeOldExitPreds(Region); + + DEBUG(PHIInfo.dump(MRI)); + + auto Entry = Region->getEntry(); + + SetVector *Children = Region->getChildren(); + DEBUG(dbgs() << "===========If Region Start===============\n"); + if (LRegion->getHasLoop()) { + DEBUG(dbgs() << "Has Backedge: Yes\n"); + } else { + DEBUG(dbgs() << "Has Backedge: No\n"); + } + + RegionMRT *Succ = nullptr; + RegionMRT *Pred = nullptr; + + unsigned BBSelectRegIn; + unsigned BBSelectRegOut; + for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) { + DEBUG(LRegion->print(dbgs(), TRI)); + + auto CNI = CI; + ++CNI; + Pred = CNI != CE ? (*CNI)->getRegionMRT() : nullptr; + + MRT *Child = (*CI); + + if (Child->isRegion()) { + + LinearizedRegion *InnerLRegion = + Child->getRegionMRT()->getLinearizedRegion(); + // We found the block is the exit of an inner region, we need + // to put it in the current linearized region. + + DEBUG(dbgs() << "Linearizing region: "); + DEBUG(InnerLRegion->print(dbgs(), TRI)); + DEBUG(dbgs() << "\n"); + + MachineBasicBlock *InnerEntry = InnerLRegion->getEntry(); + if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) { + // Entry has already been linearized, no need to do this region. + resolvePHIInfos(InnerEntry); + continue; + } + bool IsRegionExit = Child == Region->getExitTree(); + BBSelectRegOut = + IsRegionExit ? LRegion->getBBSelectRegOut() + : Succ ? Succ->getLinearizedRegion()->getBBSelectRegIn() + : BBSelectRegIn; + BBSelectRegIn = Pred ? Pred->getLinearizedRegion()->getBBSelectRegOut() + : InnerLRegion->getBBSelectRegIn(); + + LRegion->setBBSelectRegOut(BBSelectRegOut, true); + + DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) + << "\n"); + DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) + << "\n"); + + MachineBasicBlock *IfEnd = CurrentMerge; + CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion, + Child->getRegionMRT()->getEntry(), + BBSelectRegIn, BBSelectRegOut); + TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); + } else { + MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB(); + DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n"); + + bool IsRegionEntryBlock = MBB == Region->getEntry(); + bool IsRegionExitBlock = MBB == Region->getExit(); + // bool isFunctionExitBlock = MBB == &(MBB->getParent()->back()); + if (MBB == getSingleExitNode(*(MBB->getParent()))) { + // If this is the exit block then we need to skip to the next. + // The "in" register will be transferred to "out" in the next + // iteration. + BBSelectRegIn = LRegion->getBBSelectRegOut(); + continue; + } + + BBSelectRegOut = + IsRegionExitBlock + ? LRegion->getBBSelectRegOut() + : Succ ? Succ->getLinearizedRegion()->getBBSelectRegIn() + : BBSelectRegIn; + + BBSelectRegIn = + IsRegionEntryBlock + ? LRegion->getBBSelectRegIn() + : createBBSelectReg(TII, MRI); + + DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) + << "\n"); + DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) + << "\n"); + + MachineBasicBlock *IfEnd = CurrentMerge; + // This is a basic block that is not part of an inner region, we + // need to put it in the current linearized region. + CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn, + BBSelectRegOut); + if (CurrentMerge) { + TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); + } + + DEBUG(PHIInfo.dump(MRI)); + } + Succ = Child->getRegionMRT(); + } + + LRegion->removeFalseRegisterKills(MRI); + + if (LRegion->getHasLoop()) { + MachineBasicBlock *NewSucc = splitEntry(LRegion); + if (isFunctionEntryBlock(LRegion->getEntry())) { + resolvePHIInfos(LRegion->getEntry()); + } + DebugLoc DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI()); + unsigned InReg = LRegion->getBBSelectRegIn(); + unsigned InnerSelectReg = + MRI->createVirtualRegister(MRI->getRegClass(InReg)); + unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); + TII->insertAssign(*(LRegion->getEntry()), + LRegion->getEntry()->getFirstTerminator(), DL, NewInReg, + // LRegion->getBBSelectRegIn(), + Region->getEntry()->getNumber()); + // Need to be careful about updating the registers insie the region. + LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI); + insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc, + InnerSelectReg, NewInReg, LRegion->getBBSelectRegOut()); + + TII->convertNonUniformLoopRegion(NewSucc, LastMerge); + } + + if (Region->isRoot()) { + TII->insertReturn(*LastMerge); + } + + DEBUG(Entry->getParent()->dump()); + DEBUG(LRegion->print(dbgs(), TRI)); + DEBUG(PHIInfo.dump(MRI)); + + DEBUG(dbgs() << "===========If Region End===============\n"); + + Region->setLinearizedRegion(LRegion); + return true; +} + +static bool containsNonUniformBranchInstr(RegionMRT *Region, + const TargetInstrInfo *TII) { + SetVector *Children = Region->getChildren(); + for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) { + MRT *Child = (*CI); + if (Child-> isMBB()) { + MachineBasicBlock *MBB = (*CI)->getMBBMRT()->getMBB(); + for (auto &TI : MBB->terminators()) { + if (TII->isNonUniformBranchInstr(TI)) { + return true; + } + } + } + } + return false; +} + +bool MachineCFGStructurizer::structurizeRegion(RegionMRT *Region) { + if (!containsNonUniformBranchInstr(Region, TII)) { + return false; + } + if (false && regionIsSimpleIf(Region)) { + transformSimpleIfRegion(Region); + return true; + } else if (regionIsSequence(Region)) { + return false; + } else { + structurizeComplexRegion(Region); + } + return false; +} + +static int structurize_once = 0; + +bool MachineCFGStructurizer::structurizeRegions(RegionMRT *Region, + bool isTopRegion) { + bool Changed = false; + + auto Children = Region->getChildren(); + for (auto CI : *Children) { + if (CI->isRegion()) { + Changed |= structurizeRegions(CI->getRegionMRT(), false); + } + } + + if (structurize_once < 2 || true) { + Changed |= structurizeRegion(Region); + structurize_once++; + } + return Changed; +} + +void MachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) { + for (auto &MBBI : MF) { + FallthroughMap[&MBBI] = MBBI.getFallThrough(); + } +} + +void MachineCFGStructurizer::initializeSelectRegisters( + RegionMRT *Region, unsigned ExistingEntryReg, unsigned ExistingExitReg, + MachineRegisterInfo *MRI, const TargetInstrInfo *TII) { + unsigned EntryReg = + ExistingEntryReg != 0 ? ExistingEntryReg : createBBSelectReg(TII, MRI); + unsigned ExitReg = + /* ExistingExitReg != 0 ? ExistingExitReg :*/ createBBSelectReg(TII, MRI); + + DEBUG(dbgs() << "Init Entry Reg: " + << PrintReg(EntryReg, MRI->getTargetRegisterInfo()) << "\n"); + DEBUG(dbgs() << "Init Exit Reg: " + << PrintReg(ExitReg, MRI->getTargetRegisterInfo()) << "\n"); + + LinearizedRegion *LRegion = new LinearizedRegion(); + LRegion->setRegionMRT(Region); + LRegion->setBBSelectRegIn(EntryReg); + LRegion->setBBSelectRegOut(ExitReg, true); + DEBUG(dbgs() << "Add LiveOut (BBSelect): " << PrintReg(ExitReg, TRI) << "\n"); + Region->setLinearizedRegion(LRegion); + LRegion->setParent(Region->getParent() + ? Region->getParent()->getLinearizedRegion() + : nullptr); + + auto CL = std::prev(Region->getChildren()->end(), 1); + unsigned CurrentEntryReg = 0; + unsigned CurrentExitReg = ExitReg; + bool IsFirst = true; + for (auto CI = Region->getChildren()->begin(), + CE = Region->getChildren()->end(); + CI != CE; ++CI) { + bool IsLast = CI == CL; + if ((*CI)->isRegion()) { + if (IsFirst) { + CurrentExitReg = 0; + IsFirst = false; + } + CurrentEntryReg = IsLast ? EntryReg : 0; + initializeSelectRegisters((*CI)->getRegionMRT(), CurrentEntryReg, + CurrentExitReg, MRI, TII); + CurrentExitReg = + (*CI)->getRegionMRT()->getLinearizedRegion()->getBBSelectRegIn(); + } else { + DEBUG(dbgs() << "InitMBB: " << (*CI)->getMBBMRT()->getMBB()->getNumber() + << "\n"); + LRegion->addMBB((*CI)->getMBBMRT()->getMBB()); + CurrentEntryReg = 0; + CurrentExitReg = 0; + } + } +} + +static void checkRegOnlyPHIInputs(MachineFunction &MF) { + for (auto &MBBI : MF) { + for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(), + E = MBBI.instr_end(); + I != E; ++I) { + MachineInstr &Instr = *I; + if (Instr.isPHI()) { + int numPreds = getPHINumInputs(Instr); + for (int i = 0; i < numPreds; ++i) { + assert(Instr.getOperand(i * 2 + 1).isReg() && + "PHI Operand not a register"); + } + } + } + } +} + +bool MachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { + auto &ST = MF.getSubtarget(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + MRI = &(MF.getRegInfo()); + initFallthroughMap(MF); + + checkRegOnlyPHIInputs(MF); + DEBUG(dbgs() << "----STRUCTURIZER START----\n"); + DEBUG(MF.dump()); + + Regions = &(getAnalysis().getRegionInfo()); + DEBUG(Regions->dump()); + + RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI); + setRegionMRT(RTree); + initializeSelectRegisters(RTree, 0, 0, MRI, TII); + DEBUG(RTree->dump(TRI)); + bool result = structurizeRegions(RTree, true); + delete RTree; + DEBUG(dbgs() << "----STRUCTURIZER END----\n"); + return result; +} + +char &llvm::MachineCFGStructurizerID = MachineCFGStructurizer::ID; + +INITIALIZE_PASS_BEGIN(MachineCFGStructurizer, "machine-cfg-structurizer", + "Machine CFG Structurizer", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass) +INITIALIZE_PASS_END(MachineCFGStructurizer, "machine-cfg-structurizer", + "Machine CFG Structurizer", false, false) +FunctionPass *llvm::createMachineCFGStructurizer() { + return new MachineCFGStructurizer(); +} Index: lib/CodeGen/MachineRegionInfo.cpp =================================================================== --- lib/CodeGen/MachineRegionInfo.cpp +++ lib/CodeGen/MachineRegionInfo.cpp @@ -1,4 +1,3 @@ - #include "llvm/CodeGen/MachineRegionInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/RegionInfoImpl.h" @@ -104,8 +103,10 @@ void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive(); - AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); } void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const { @@ -119,14 +120,15 @@ #endif char MachineRegionInfoPass::ID = 0; +char &MachineRegionInfoPassID = MachineRegionInfoPass::ID; -INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, "regions", - "Detect single entry single exit regions", true, true) +INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, "machine-regions", + "Detect machine single entry single exit regions", true, true) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) -INITIALIZE_PASS_END(MachineRegionInfoPass, "regions", - "Detect single entry single exit regions", true, true) +INITIALIZE_PASS_END(MachineRegionInfoPass, "machine-regions", + "Detect machine single entry single exit regions", true, true) // Create methods available outside of this file, to use them // "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -514,15 +514,20 @@ bool GCNPassConfig::addPreISel() { AMDGPUPassConfig::addPreISel(); + const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine(); // FIXME: We need to run a pass to propagate the attributes when calls are // supported. addPass(&AMDGPUAnnotateKernelFeaturesID); - addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions + if (TM.Options.Structurizer == CFGStructurizer::Early) { + addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions + } addPass(createSinkingPass()); addPass(createSITypeRewriter()); addPass(createAMDGPUAnnotateUniformValues()); - addPass(createSIAnnotateControlFlowPass()); + if (TM.Options.Structurizer == CFGStructurizer::Early) { + addPass(createSIAnnotateControlFlowPass()); + } return false; } @@ -576,6 +581,9 @@ #endif void GCNPassConfig::addPreRegAlloc() { + if (TM->Options.Structurizer == CFGStructurizer::Late) { + addPass(&MachineCFGStructurizerID); + } addPass(createSIShrinkInstructionsPass()); addPass(createSIWholeQuadModePass()); } Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -140,6 +140,35 @@ RegScavenger *RS, unsigned TmpReg, unsigned Offset, unsigned Size) const; + void insertAssign(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL, + unsigned DestReg, + unsigned Value) const override; + + const TargetRegisterClass *getPreferredSelectRegClass( + unsigned Size) const override; + + void insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned DstReg, ArrayRef Cond, + unsigned TrueReg, unsigned FalseReg) const override; + + unsigned insertNE(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned SrcReg, int Value) const override; + + unsigned insertEQ(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned SrcReg, int Value) const override; + + unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + RegScavenger *RS, + unsigned TmpReg, + unsigned Offset, + unsigned Size) const; + void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -596,6 +625,7 @@ void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + void insertReturn(MachineBasicBlock &MBB) const override; /// \brief Return the number of wait states that result from executing this /// instruction. unsigned getNumWaitStates(const MachineInstr &MI) const; @@ -641,6 +671,16 @@ bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; + bool isNonUniformBranchInstr(MachineInstr &Instr) const override; + + virtual void convertNonUniformIfRegion( + MachineBasicBlock *IfEntry, + MachineBasicBlock *IfEnd) const override; + + virtual void convertNonUniformLoopRegion( + MachineBasicBlock *LoopEntry, + MachineBasicBlock *LoopEnd) const override; + ArrayRef> getSerializableTargetIndices() const override; Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -460,6 +460,215 @@ return Opcode; } +void SIInstrInfo::insertAssign(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned Value) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg); + if (RegClass == &AMDGPU::SReg_32RegClass || + RegClass == &AMDGPU::SGPR_32RegClass || + RegClass == &AMDGPU::SReg_32_XM0RegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg).addImm(Value); + } else if (RegClass == &AMDGPU::SReg_64RegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg).addImm(Value); + } else if (RegClass == &AMDGPU::VGPR_32RegClass || + RegClass == &AMDGPU::VReg_1RegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg).addImm(Value); + } else if (RegClass == &AMDGPU::VReg_64RegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg).addImm(Value); + } else if (RegClass == &AMDGPU::SReg_128RegClass) { + unsigned Reg0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg0).addImm(Value); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg1).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg2).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg3).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), DestReg) + .addReg(Reg0) + .addImm(AMDGPU::sub0) + .addReg(Reg1) + .addImm(AMDGPU::sub1) + .addReg(Reg2) + .addImm(AMDGPU::sub2) + .addReg(Reg3) + .addImm(AMDGPU::sub3); + } else if (RegClass == &AMDGPU::SReg_256RegClass) { + unsigned Reg0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg4 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg5 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg6 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned Reg7 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg0).addImm(Value); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg1).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg2).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg3).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg4).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg5).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg6).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), Reg7).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), DestReg) + .addReg(Reg0) + .addImm(AMDGPU::sub0) + .addReg(Reg1) + .addImm(AMDGPU::sub1) + .addReg(Reg2) + .addImm(AMDGPU::sub2) + .addReg(Reg3) + .addImm(AMDGPU::sub3) + .addReg(Reg4) + .addImm(AMDGPU::sub4) + .addReg(Reg5) + .addImm(AMDGPU::sub5) + .addReg(Reg6) + .addImm(AMDGPU::sub6) + .addReg(Reg7) + .addImm(AMDGPU::sub7); + } else if (RegClass == &AMDGPU::VReg_128RegClass) { + unsigned Reg0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg2 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg3 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg0).addImm(Value); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg1).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg2).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg3).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), DestReg) + .addReg(Reg0) + .addImm(AMDGPU::sub0) + .addReg(Reg1) + .addImm(AMDGPU::sub1) + .addReg(Reg2) + .addImm(AMDGPU::sub2) + .addReg(Reg3) + .addImm(AMDGPU::sub3); + } else if (RegClass == &AMDGPU::VReg_256RegClass) { + unsigned Reg0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg2 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg3 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg4 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg5 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg6 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned Reg7 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg0).addImm(Value); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg1).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg2).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg3).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg4).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg5).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg6).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), Reg7).addImm(0); + BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), DestReg) + .addReg(Reg0) + .addImm(AMDGPU::sub0) + .addReg(Reg1) + .addImm(AMDGPU::sub1) + .addReg(Reg2) + .addImm(AMDGPU::sub2) + .addReg(Reg3) + .addImm(AMDGPU::sub3) + .addReg(Reg4) + .addImm(AMDGPU::sub4) + .addReg(Reg5) + .addImm(AMDGPU::sub5) + .addReg(Reg6) + .addImm(AMDGPU::sub6) + .addReg(Reg7) + .addImm(AMDGPU::sub7); + } else { + llvm_unreachable("Unsupported register class"); + } +} + +const TargetRegisterClass * +SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const { + return &AMDGPU::VGPR_32RegClass; +} + +void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DstReg, + ArrayRef Cond, unsigned TrueReg, + unsigned FalseReg) const { + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RegClass = MRI.getRegClass(DstReg); + assert(RegClass == &AMDGPU::VGPR_32RegClass && "Not a VGPR32 reg"); + + if (Cond.size() == 1) { + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .addOperand(Cond[0]); + } else if (Cond.size() == 2) { + assert(Cond[0].isImm() && "Cond[0[ is not an immediate"); + switch (Cond[0].getImm()) { + case SIInstrInfo::SCC_TRUE: + llvm_unreachable("Unhandled branch predicate SCC_TRUE"); + break; + case SIInstrInfo::SCC_FALSE: + llvm_unreachable("Unhandled branch predicate SCC_FALSE"); + break; + case SIInstrInfo::VCCNZ: { + MachineOperand RegOp = Cond[1]; + RegOp.setImplicit(false); + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .addOperand(RegOp); + break; + } + case SIInstrInfo::VCCZ: { + MachineOperand RegOp = Cond[1]; + RegOp.setImplicit(false); + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(TrueReg) + .addReg(FalseReg) + .addOperand(RegOp); + break; + } + case SIInstrInfo::EXECNZ: + llvm_unreachable("Unhandled branch predicate EXECNZ"); + break; + case SIInstrInfo::EXECZ: + llvm_unreachable("Unhandled branch predicate EXECZ"); + break; + default: + llvm_unreachable("invalid branch predicate"); + } + } else { + llvm_unreachable("Can only handle Cond size 1 or 2"); + } +} + +unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned SrcReg, int Value) const { + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg) + .addImm(Value) + .addReg(SrcReg); + + return Reg; +} + +unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned SrcReg, int Value) const { + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg) + .addImm(Value) + .addReg(SrcReg); + + return Reg; +} + unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { if (DstRC->getSize() == 4) { @@ -796,6 +1005,18 @@ insertWaitStates(MBB, MI, 1); } +void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const { + auto MF = MBB.getParent(); + SIMachineFunctionInfo *Info = MF->getInfo(); + + if (MBB.succ_size() == 0) { + bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end(); + if (HasNoTerminator) + BuildMI(MBB, MBB.end(), DebugLoc(), + get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN)); + } +} + unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: return 1; // FIXME: Do wait states equal cycles? @@ -1202,14 +1423,20 @@ return false; } - BranchPredicate Pred = getBranchPredicate(I->getOpcode()); - if (Pred == INVALID_BR) - return true; + MachineBasicBlock *CondBB = NULL; - MachineBasicBlock *CondBB = I->getOperand(0).getMBB(); - Cond.push_back(MachineOperand::CreateImm(Pred)); - Cond.push_back(I->getOperand(1)); // Save the branch register. + if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { + CondBB = I->getOperand(1).getMBB(); + Cond.push_back(I->getOperand(0)); + } else { + BranchPredicate Pred = getBranchPredicate(I->getOpcode()); + if (Pred == INVALID_BR) + return true; + CondBB = I->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(Pred)); + Cond.push_back(I->getOperand(1)); // Save the branch register. + } ++I; if (I == MBB.end()) { @@ -1305,6 +1532,13 @@ return 1; } + if(Cond.size() == 1 && Cond[0].isReg()) { + BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO)) + .addOperand(Cond[0]) + .addMBB(TBB); + return 1; + } + assert(TBB && Cond[0].isImm()); unsigned Opcode @@ -1347,8 +1581,12 @@ bool SIInstrInfo::reverseBranchCondition( SmallVectorImpl &Cond) const { assert(Cond.size() == 2); - Cond[0].setImm(-Cond[0].getImm()); - return false; + if (Cond[0].isImm()) { + Cond[0].setImm(-Cond[0].getImm()); + return false; + } else { + return true; + } } static void removeModOperands(MachineInstr &MI) { @@ -3587,6 +3825,81 @@ return false; } +bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const { + return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO; +} + +void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry, + MachineBasicBlock *IfEnd) const { + MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator(); + assert(TI != IfEntry->end()); + + MachineInstr *Branch = &(*TI); + MachineFunction *MF = IfEntry->getParent(); + MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo(); + + if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { + unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + MachineInstr *SIIF = + BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg) + .addOperand(Branch->getOperand(0)) + .addOperand(Branch->getOperand(1)); + MachineInstr *SIEND = + BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_END_CF)) + .addReg(DstReg); + + IfEntry->erase(TI); + IfEntry->insert(IfEntry->end(), SIIF); + IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND); + } +} + +void SIInstrInfo::convertNonUniformLoopRegion( + MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const { + MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator(); + // We expect 2 terminators, one conditional and one unconditional. + assert(TI != LoopEnd->end()); + + MachineInstr *Branch = &(*TI); + MachineFunction *MF = LoopEnd->getParent(); + MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo(); + + if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { + + unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + MachineInstrBuilder HeaderPHIBuilder = + BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg); + for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(), + E = LoopEntry->pred_end(); + PI != E; ++PI) { + if (*PI == LoopEnd) { + HeaderPHIBuilder.addReg(BackEdgeReg); + } else { + MachineBasicBlock *PMBB = *PI; + unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + insertAssign(*PMBB, PMBB->getFirstTerminator(), DebugLoc(), ZeroReg, 0); + HeaderPHIBuilder.addReg(ZeroReg); + } + HeaderPHIBuilder.addMBB(*PI); + } + MachineInstr *HeaderPhi = HeaderPHIBuilder; + MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(), + get(AMDGPU::SI_IF_BREAK), BackEdgeReg) + .addReg(DstReg) + .addOperand(Branch->getOperand(0)); + MachineInstr *SILOOP = + BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP)) + .addReg(BackEdgeReg) + .addMBB(LoopEntry); + + LoopEntry->insert(LoopEntry->begin(), HeaderPhi); + LoopEnd->erase(TI); + LoopEnd->insert(LoopEnd->end(), SIIFBREAK); + LoopEnd->insert(LoopEnd->end(), SILOOP); + } +} + ArrayRef> SIInstrInfo::getSerializableTargetIndices() const { static const std::pair TargetIndices[] = { Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -166,6 +166,15 @@ let isTerminator = 1 in { +let Uses = [EXEC] in { + def SI_NON_UNIFORM_BRCOND_PSEUDO : PseudoInstSI < + (outs), + (ins SReg_64:$vcc, brtarget:$target), + [(brcond i1:$vcc, bb:$target)]> { + let Size = 12; + } +} + def SI_IF: CFPseudoInstSI < (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target), [(set i64:$dst, (int_amdgcn_if i1:$vcc, bb:$target))], 1, 1> {