Index: include/llvm/Analysis/ControlStructureAnalysis.h =================================================================== --- /dev/null +++ include/llvm/Analysis/ControlStructureAnalysis.h @@ -0,0 +1,197 @@ +//===- llvm/Analysis/ControlStructureAnalysis.h - Control Structure Analysis --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------------------------===// +// +// This file defines the ControlStructureTree class, used to build a tree +// of control structures (IF/LOOP/BLOCK) in a function. +// Refer to "Advanced Compiler Design and Implementation" -- Steven. S. Muchnik +// for an explanation of the algorithms and data structures used here. +// +// Warning: Do not confuse "Struct" used throughout this file with C "struct"s. +// Structure here refers to control structures, such as IF/WHILE etc. +// +//===--------------------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CONTROLSTRUCTUREANALYSIS_H +#define LLVM_ANALYSIS_CONTROLSTRUCTUREANALYSIS_H + +#include +#include + +typedef std::vector IntVector; +typedef std::set IntSet; + +namespace llvm { + + struct ControlStructureTree { + enum CSTNodeTy { + CST_None = 0, // Deleted node / Failuring in id'ing a structure + // No node can have this type. + + // All constructs below have a single entry. + CST_Block, // Straight line blocks code. Note that this + // need not be one basic block. It can be a + // straight sequence (no control-flow) of any + // control structure nodes. + CST_IfThen, // An If-Then construct + CST_IfThenElse, // an If-Then-Else construct + CST_Switch, // Multiway branching - switch statement + CST_Proper, // A proper acyclic interval. This is + // any acyclic structure that is not + // any of the acyclic structures above. + CST_SelfLoop, // A node branching back to itself. + CST_WhileLoop, // Single entry, single exit loop. + CST_NaturalLoop, // A reducible loop (i.e., single entry). + CST_Improper, // Consists of a multiple-entry loop. + // Single entry is ensured by having + // the common dominator also in the node. + }; + + // This represents node in the ControlStructureTree. Note + // that it also pseudo-represents nodes in the abstract CFG + // and hence contains Succ,Pred for the abstract CFG. + struct CSTNode { + int Index; // Index into the "Nodes" vector in Tree. + // Valid only if >= 0. + int preOrderNo, postOrderNo; // preorder and postorder traversal numbers. + + // Type of this node. + CSTNodeTy StructType; + // The set of nodes making up the region abstracted to this node. + // This set is the set of children of this node in the tree. + IntVector StructNodes; + // Index of the region containing this node (parent in the tree) + int StructOf; + + // Succesors and Predecessors in the abstract CFG (the abstract + // CFG is initially the function's CFG and is modified as the + // algorithm progresses by abstracting out nodes). + IntVector Succ, Pred; + + // DomTree related edges + IntVector domTreeChildren; + int iDom; + + // The loop this is immediately contained in, in terms of llvm::LoopInfo. + // For non-basic-blocks, we update this in Replace(). + Loop *iLoop; + + BasicBlock *BB; // Only set if this refers a single basic block. + + CSTNode(int Index_p, CSTNodeTy ty) : + Index(Index_p), preOrderNo(-1), postOrderNo(-1), StructType(ty), + StructOf(-1), iDom(-1), iLoop(nullptr), BB(nullptr) { ; }; + CSTNode(int Index_p, BasicBlock *BB_p) : + Index(Index_p), preOrderNo(-1), postOrderNo(-1), StructType(CST_Block), + StructOf(-1), iDom(-1), iLoop(nullptr), BB(BB_p) { ; }; + }; + typedef std::vector CSTNodeVector; + + private: + // The function we are analyzing. + Function *curF; + DominatorTree *DT; + LoopInfo *LI; + + // The set of all nodes in the tree (and the abstract CFG). + CSTNodeVector Nodes; + // List of deleted nodes for re-use. + IntVector deletedNodes; + + // A map from each BB in curF to the Index of its Node in Nodes. + std::map BBToNodeMap; + // Nodes[Post[i]] is the node with post order number "i". + IntVector Post; + // Current node (in main loop) will have post order number PostCtr. + unsigned PostCtr; + // Entry into the abstract CFG. This might change during the execution + // of the algorithm as nodes (including entryNode) are reduced. + int entryNode; + + // Initialize the abstract CFG to the actual CFG. + // Return the index of the function entry node. + int initializeGraph(void); + // Clear and fill up "Post". + void DFSPostorder(int entryNode); + // Identify known acyclic structures + CSTNodeTy AcyclicRegionType(IntSet &N, int node, IntVector &nset); + // Identify arbitrary acyclic structures (proper intervals) + CSTNodeTy ProperIntervalType(IntSet &N, int node, IntVector &nset); + // Identify cyclic structures + CSTNodeTy CyclicRegionType(IntSet &N, int node, IntVector &nset); + // Replace NodeSet by an abstract region node and set data structures. + // Return the new CSTNode representing the abstract region. + int Reduce(IntSet &N, CSTNodeTy rtype, IntVector &NodeSet); + // Link region "node" into abstract flow graph, adjust the postorder + // traversal, and update successors and predecessors. + void Replace(IntSet &N, int node, IntVector &NodeSet); + // Is edge a backedge. Uses preOrderNo and postOrderNo to compute. + bool isBackEdge(CSTNode &from, CSTNode &to); + // Is this CSTNodeTy a cyclic type + bool isCyclicCSTNodeTy(CSTNodeTy &ty); + // Is there a cycle within this NodeSet + bool NodeSetContainsCycle(IntVector &NodeSet); + // Get the first common dominator for all nodes in NodeSet. + int nearestCommonDominator(IntVector &NodeSet); + // Does A dominate B + bool dominates(int A, int B); + // Returns true if there is a node "k" such that there is a + // (possibly empty) path from "m" to "k" that does not pass + // through "n" and an edge "k"->"n" that is a back edge. + bool PathBack(IntSet &N, int m, int n); + // Returns true if there is a non-trivial path from n to m such + // that all nodes in the path are in I. Otherwise returns false. + bool Path(int n, int m, IntSet &I); + // Is A an ancestor of B in the control structure tree? + bool isCSTAncestor(int A, int B); + + public: + ControlStructureTree(Function *F) { curF = F; }; + // (re)compute ConstrolStructureTree for curF. + void analyze(DominatorTree *DTp, LoopInfo *LIp); + // If a CST_Block has another CST_Block as its child, merge them. + void mergeBlocks(int subtree); + void print(raw_ostream &OS); + + // Some accessors + int getFunctionEntryNode(void); + const CSTNodeVector &getNodeList(void); + CSTNode &getNode(int node); + Function *getCurF(void); + + // Get specific tree children for each type of node + int getIf(int node); // Valid for CST_IfThen and CST_IfThenElse + int getThen(int node); // Valid for CST_IfThen and CST_IfThenElse + int getElse(int node); // Valid for CST_IfThenElse + int getEntry(int node); // Valid for all types of nodes + int getExit(int node); // Valid for all types of nodes + Loop *getLoop(int node); // Valid only for CST_*Loop + // Recursively call getEntry till a node corresponding + // to a basic block is found. + BasicBlock *getEntryBlock(int node); + // Get the entry block for the exit of "node" + BasicBlock *getExitBlock(int node); + // Get a list of basic blocks in subtree rooted at "n". + void getBasicBlocksInSubTree(std::vector &BBs, int n); + // Does node "n" have an exit out from the function? + bool ExitsFunction(int n); + + // Modification API + // Add a new node to the graph, return a reference to it. + CSTNode &addNode(CSTNodeTy ty, BasicBlock *BB = nullptr); + // Delete this node and add to free list. + void deleteNode(int node); + // Delete this node and all its descendents. + void deleteSubTree(int node); + + }; + +} // End llvm namespace + + +#endif // LLVM_ANALYSIS_CONTROLSTRUCTUREANALYSIS_H Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -19,6 +19,7 @@ CostModel.cpp CodeMetrics.cpp ConstantFolding.cpp + ControlStructureAnalysis.cpp Delinearization.cpp DemandedBits.cpp DependenceAnalysis.cpp Index: lib/Analysis/ControlStructureAnalysis.cpp =================================================================== --- /dev/null +++ lib/Analysis/ControlStructureAnalysis.cpp @@ -0,0 +1,1173 @@ +//===- ControlStructureTree.h - Control Structure Analysis --*----- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ControlStructureTree class, used to build a tree +// of control structures (IF/LOOP/BLOCK) in a function. +// Refer to "Advanced Compiler Design and Implementation" -- Steven. S. Muchnik +// for an explanation of the algorithms and data structures used here. +// +// Warning: Do not confuse "Struct" used throughout this file with C "struct"s. +// Structure here refers to control structures, such as IF/WHILE etc. + +//===----------------------------------------------------------------------===// + +#include +#include + +#include "llvm/IR/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ControlStructureAnalysis.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + +static BasicBlock *getIDom(DominatorTree *DT, BasicBlock *N) +{ + DomTreeNode *DTN = DT->getNode(N)->getIDom(); + if (DTN) + return DTN->getBlock(); + + return nullptr; +} + +static bool IntVectorContains(IntVector &IV, int n) +{ + return std::find(IV.begin(), IV.end(), n) != IV.end(); +} + +static int IntVectorFind(IntVector &IV, int n) +{ + return std::find(IV.begin(), IV.end(), n) - IV.begin(); +} + +// Valid for CST_IfThen and CST_IfThenElse +// Return -1 if cannot estimate +int ControlStructureTree::getIf(int node) +{ + assert(Nodes[node].StructType == CST_IfThen || + Nodes[node].StructType == CST_IfThenElse); + + // Check if we can get the "then" part. If so, return entry. + if (getThen(node) >= 0) + return getEntry(node); + + return -1; +} + +// Valid for CST_IfThen and CST_IfThenElse +// Return -1 if cannot estimate +int ControlStructureTree::getThen(int node) +{ + CSTNode &Node = Nodes[node]; + assert(Node.StructType == CST_IfThen || Node.StructType == CST_IfThenElse); + + // If the entry block is a leaf in the tree, then we can + // use the basic block itself to find the two branches. + // In such a case we return the right branch. Else -1. + int entry = getEntry(node); + assert(entry >= 0); + BasicBlock *ifBB = Nodes[entry].BB; + if (ifBB == nullptr) + return -1; + + BranchInst *BI = dyn_cast(ifBB->getTerminator()); + if (!BI || !BI->isConditional()) + return -1; + + BasicBlock *ifTrue = BI->getSuccessor(0); + BasicBlock *ifFalse = BI->getSuccessor(1); + + if (Node.StructType == CST_IfThen) { + assert(Node.StructNodes.size() == 2 && entry == Node.StructNodes[0]); + if (isCSTAncestor(Node.StructNodes[1], BBToNodeMap[ifTrue]) || + isCSTAncestor(Node.StructNodes[1], BBToNodeMap[ifFalse])) + { + return Node.StructNodes[1]; + } + llvm_unreachable("Couldn't find \"Then\" in CST_IfThen"); + } + + assert(Node.StructType == CST_IfThenElse); + // We need to check which of the children in + // StructNodes is a parent of "ifTrue". + assert(Node.StructNodes.size() == 3 && entry == Node.StructNodes[0]); + + // See if StructNodes[1] is an ancestor of ifTrue. + assert(BBToNodeMap.find(ifTrue) != BBToNodeMap.end()); + if (isCSTAncestor(Node.StructNodes[1], BBToNodeMap[ifTrue])) + return Node.StructNodes[1]; + if (isCSTAncestor(Node.StructNodes[2], BBToNodeMap[ifTrue])) + return Node.StructNodes[2]; + + llvm_unreachable("Couldn't find \"Then\" in CST_IfThen/CST_IfThenElse"); + return -1; +} + +// Valid for CST_IfThenElse +// Return -1 if cannot estimate +int ControlStructureTree::getElse(int node) +{ + CSTNode &Node = Nodes[node]; + assert(Node.StructType == CST_IfThenElse); + + int then = getThen(node); + if (then < 0) + return -1; + + if (Node.StructNodes[1] == then) + return Node.StructNodes[2]; + else + return Node.StructNodes[1]; +} + +// Valid for all types of nodes. +int ControlStructureTree::getEntry(int node) +{ + CSTNode &Node = Nodes[node]; + if (Node.StructNodes.size() == 0) { + assert(Node.BB != NULL); + return Node.Index; + } + // The first CST child is, by convention, the entry. + return Node.StructNodes[0]; +} + +// Get a single exit block (i.e., a block that is not +// a descendent of "node", but has an edge from a +// descendent of "node"), if a unique one exists. +// Valid for all types of nodes. Mostly +// works on IfThen/IfThenElse and natural loops. +int ControlStructureTree::getExit(int node) +{ + // Scan up till we find a CST_Block B such that + // the child C of B we just came up through is not + // the last child of B. The immediate sibling of C + // would be our exit block. + CSTNode *N = &Nodes[node]; + while (N->StructOf >= 0) { + CSTNode *P = &Nodes[N->StructOf]; + // TODO: Proper/Improper nodes on the way means "cannot determine". + if (P->StructType == CST_Block) { + unsigned nPostInP = IntVectorFind(P->StructNodes, N->Index); + assert(nPostInP < P->StructNodes.size()); + if (nPostInP < P->StructNodes.size()-1) + return P->StructNodes[nPostInP+1]; + } + N = P; + } + return -1; +} + +// Recursively call getEntry till a node corresponding +// to a basic block is found. +BasicBlock *ControlStructureTree::getEntryBlock(int node) +{ + int n = node; + while (n != getEntry(n)) + n = getEntry(n); + + return Nodes[n].BB; +} + +// Get the entry block for the exit of "node" +BasicBlock *ControlStructureTree::getExitBlock(int node) +{ + int exitNode = getExit(node); + if (exitNode >= 0) + return getEntryBlock(exitNode); + + return nullptr; +} + +// Valid only for CST_*Loop +Loop *ControlStructureTree::getLoop(int node) +{ + CSTNode &Node = Nodes[node]; + assert(Node.StructType == CST_SelfLoop || + Node.StructType == CST_WhileLoop || + Node.StructType == CST_NaturalLoop); + + // Node.iLoop will contain the parent loop for node, + // not the loop that it represents. Get the loop for + // node from one of its children. + assert(Node.StructNodes.size() > 0); + return Nodes[Node.StructNodes[0]].iLoop; +} + +// Get a list of basic blocks in subtree rooted at "n". +void ControlStructureTree::getBasicBlocksInSubTree +(std::vector &BBs, int n) +{ + std::vector WL; + WL.push_back(n); + + // Do a traversal over the subtree + while (!WL.empty()) { + int n = WL.back(); + WL.pop_back(); + + CSTNode &Node = Nodes[n]; + if (Node.BB != nullptr) + BBs.push_back(Node.BB); + + for (int child : Node.StructNodes) { + WL.push_back(child); + } + } +} + +// Does node "n" have an exit out from the function? +bool ControlStructureTree::ExitsFunction(int n) +{ + // TODO: Check entire subtree from "n" if it has + // an exit from the function. This will involve maintaining + // a field in each node and keeping it updated (or traversing + // the tree everytime). + + // Check if this is a single BB and it has a return/exit. + if (Nodes[n].BB) { + TerminatorInst *TI = Nodes[n].BB->getTerminator(); + if (isa(TI) || isa(TI)) + return true; + } + return false; +} + +ControlStructureTree::CSTNode &ControlStructureTree::getNode(int node) +{ + return Nodes[node]; +} + +Function *ControlStructureTree:: getCurF(void) +{ + return curF; +} + +// Delete this node and add to free list. +void ControlStructureTree::deleteNode(int node) +{ + CSTNode &Node = Nodes[node]; + + // Update BBToNodeMap if necessary + if (Node.BB != nullptr && BBToNodeMap.find(Node.BB) != BBToNodeMap.end()) { + assert(BBToNodeMap[Node.BB] == Node.Index); + BBToNodeMap.erase(Node.BB); + } + // Mark as deleted + Node.StructType = CST_None; + // Clear all its data + Node.StructNodes.clear(); + Node.StructOf = -1; + Node.Succ.clear(); Node.Pred.clear(); + Node.domTreeChildren.clear(); + Node.iDom = -1; + Node.iLoop = nullptr; + Node.BB = nullptr; + + // Add to free list + deletedNodes.push_back(node); +} + +// Delete this node and all its descendents. +void ControlStructureTree::deleteSubTree(int node) { + CSTNode &Node = Nodes[node]; + for (int child : Node.StructNodes) + deleteSubTree(child); + + deleteNode(node); +} + +// Add a new node to the graph, return a reference to it. +ControlStructureTree::CSTNode &ControlStructureTree:: +addNode(ControlStructureTree::CSTNodeTy ty, BasicBlock *BB) +{ + CSTNode *Node = nullptr; + + if (deletedNodes.empty()) { + int Index = Nodes.size(); + Nodes.push_back(CSTNode(Index, ty)); + Node = &Nodes[Index]; + } else { + Node = &Nodes[deletedNodes.back()]; + deletedNodes.pop_back(); + Node->StructType = ty; + } + + // BB is valid only if ty is CST_Block + assert(ty == CST_Block || BB == nullptr); + if (ty == CST_Block && BB != nullptr) { + Node->BB = BB; + BBToNodeMap[BB] = Node->Index; + } + + return *Node; +} + +// Initialize the abstract CFG to the actual CFG +// Return the index of the function entry node. +int ControlStructureTree::initializeGraph() +{ + + Nodes.clear(); + int Index = 0; + for (BasicBlock &BB : curF->getBasicBlockList()) { + Nodes.push_back(CSTNode(Index, &BB)); + BBToNodeMap[&BB] = Index++; + } + + // Now set Succ,Pred, dominator and loop info for each node + for (CSTNode &Node : Nodes) { + BasicBlock *BB = Node.BB; + for (BasicBlock *Succ : successors(BB)) { + assert(BBToNodeMap.find(Succ) != BBToNodeMap.end()); + int SuccNodeIndex = BBToNodeMap[Succ]; + assert(SuccNodeIndex >= 0); + Node.Succ.push_back(SuccNodeIndex); + Nodes[SuccNodeIndex].Pred.push_back(Node.Index); + } + + BasicBlock *iDomBB = getIDom(DT, BB); + if (iDomBB) { + assert(BBToNodeMap.find(iDomBB) != BBToNodeMap.end()); + Node.iDom = BBToNodeMap[iDomBB]; + Nodes[Node.iDom].domTreeChildren.push_back(Node.Index); + } else { + Node.iDom = -1; + } + + if (Loop *L = LI->getLoopFor(BB)) { + Node.iLoop = L; + } + } + + assert(BBToNodeMap.find(&(curF->getEntryBlock())) != BBToNodeMap.end()); + return BBToNodeMap[&(curF->getEntryBlock())]; +} + +// If a CST_Block has another CST_Block as its child, merge them. +void ControlStructureTree::mergeBlocks(int subtree) +{ + // Do a post order traversal over the graph rooted at subtree. + CSTNode &Node = Nodes[subtree]; + for (unsigned i = 0; i < Node.StructNodes.size(); i++) { + int child = Node.StructNodes[i]; + mergeBlocks(child); + } + + if (Node.StructType != CST_Block) + return; + + // Now check if any of the children is a CST_Block, + // if so, adopt all of its children. + for (unsigned i = 0; i < Node.StructNodes.size();) { + CSTNode &Child = Nodes[Node.StructNodes[i]]; + if (Child.StructType == CST_Block && !Child.StructNodes.empty()) { + auto itr = Node.StructNodes.begin() + i; + // Add each child of Child to Node.StructNodes + Node.StructNodes.insert(itr, Child.StructNodes.begin(), + Child.StructNodes.end()); + // Point "itr" back to Child + i += Child.StructNodes.size(); + itr = Node.StructNodes.begin() + i; + // delete Child as a child from Node + Node.StructNodes.erase(itr); + // Update the parent for each new node just added + for (int gChild : Child.StructNodes) { + Nodes[gChild].StructOf = Node.Index; + } + // Delete Child from the graph + deleteNode(Child.Index); + } else { + i++; + } + } + + // If CST_Block now has only a single child, we delete this node itself. + if (Node.StructNodes.size() == 1 && Node.StructOf >= 0) { + CSTNode &Parent = Nodes[Node.StructOf]; + auto itr = std::find(Parent.StructNodes.begin(), + Parent.StructNodes.end(), + Node.Index); + assert(itr != Parent.StructNodes.end()); + // replace itr with child. + *itr = Node.StructNodes[0]; + Nodes[Node.StructNodes[0]].StructOf = Parent.Index; + deleteNode(Node.Index); + } +} + +void ControlStructureTree::print(raw_ostream &OS) +{ + // Print a dotty (graphviz) graph. + OS << "digraph \"" << curF->getName() << "\" {\n"; + for (CSTNode &Node : Nodes) { + if (Node.StructType == CST_None) { + // Deleted node + continue; + } + // First print the label + assert(Node.Index >= 0); + OS << Node.Index << " [label=\"" << Node.Index << "\\n"; + if (Node.preOrderNo >= 0) { + assert(Node.postOrderNo >= 0); + OS << "(" << Node.preOrderNo << "," << Node.postOrderNo << ")\\n"; + } + switch(Node.StructType) { + case CST_None: + llvm_unreachable("ControlStructureTree: Node has type CST_None"); + break; + case CST_Block: + OS << "Block"; + if (Node.BB != nullptr) { + // If this is a single block, it cannot have + // any children in the control tree. + assert(Node.StructNodes.size() == 0); + OS << "\\n(BasicBlock=" << Node.BB->getName() << ")"; + } + break; + case CST_IfThen: + OS << "IfThen"; break; + case CST_IfThenElse: + OS << "IfThenElse"; break; + case CST_Switch: + OS << "Switch"; break; + case CST_Proper: + OS << "Proper"; break; + case CST_SelfLoop: + OS << "SelfLoop"; break; + case CST_WhileLoop: + OS << "WhileLoop"; break; + case CST_NaturalLoop: + OS << "NaturalLoop"; break; + case CST_Improper: + OS << "Improper"; break; + } + OS << "\"]\n"; + + // Print abstract CFG edges + for (int SuccIndex : Node.Succ) { + assert(SuccIndex >= 0); + assert(IntVectorContains(Nodes[SuccIndex].Pred, Node.Index)); + OS << Node.Index << " -> " << SuccIndex << " [style=dotted]\n"; + } + + // Print control structure tree children: + for (unsigned i = 0; i < Node.StructNodes.size(); i++) { + int ChildIndex = Node.StructNodes[i]; + assert(ChildIndex >= 0); + assert(Nodes[ChildIndex].StructOf == Node.Index); + std::string eLabel; + switch(Node.StructType) { + case CST_Block: + eLabel = std::to_string(i); break; + case CST_IfThen: + case CST_IfThenElse: + { + if (getIf(Node.Index) == ChildIndex) + eLabel = "If"; + else if (getThen(Node.Index) == ChildIndex) + eLabel = "Then"; + else if (Node.StructType == CST_IfThenElse && + getElse(Node.Index) == ChildIndex) + { + eLabel = "Else"; + } + break; + } + default: + if (getEntry(Node.Index) == ChildIndex) + eLabel = "e"; + break; + } + OS << Node.Index << " -> " << ChildIndex << + " [label=\"" << eLabel << "\"]\n"; + } + } + OS << "}\n"; +} + +// Clear and fill up "Post", by doing a +// non-recursive post-order traversal over +// the abstract CFG. +void ControlStructureTree::DFSPostorder(int entryNode) +{ + unsigned preOrderCtr = 0, postOrderCtr = 0; + std::vector Visited(Nodes.size(), false); + // Many nodes in Nodes may not be visited as they + // are no longer part of the active graph. Its + // simpler to have them all set to -1. + for (unsigned i = 0; i < Nodes.size(); i++) { + Nodes[i].preOrderNo = -1; + Nodes[i].postOrderNo = -1; + } + Post.clear(); + assert(entryNode >= 0 && (unsigned)entryNode < Nodes.size()); + + // Recursive routine: + // void DFSPostorder(int curNodeIndex) { + // Visited[curNodeIndex] = true; + // Nodes[curNodeIndex].preOrderNo = preOrderCtr++; + // for (SuccVecIndex = 0; SuccVecIndex < Nodes[curNodeIndex].Succ.size(); + // SuccVecIndex++) + // { + // SuccIndex = Nodes[curNodeIndex].Succ[SuccVecIndex]; + // if (!Visited[SuccIndex]) + // DFSPostorder(SuccIndex); + // } + // Post.push_back(curNodeIndex); + // assert(Post.size()-1 == postOrderCtr); + // Nodes[curNodeIndex].postOrderNo = postOrderCtr++; + // } + + // Simulate recursion. Structure of + // all local variables in recursive function: + struct Locals { + int curNodeIndex; + int SuccVecIndex; + int returnPoint; + Locals(int curNodeIndex_p, int SuccVecIndex_p, int returnPoint_p) : + curNodeIndex(curNodeIndex_p), SuccVecIndex(SuccVecIndex_p), + returnPoint(returnPoint_p) { ; }; + }; + + std::stack Stack; + // Start recursion, return to return point 1. + Stack.push(Locals(entryNode, 0, 1)); + + // The successor we are looking at. Since this is computed + // each time from curLocal, it need not be on the stack. + int SuccIndex; +BEGIN: + Locals *curLocal = &(Stack.top()); + Visited[curLocal->curNodeIndex] = true; + Nodes[curLocal->curNodeIndex].preOrderNo = preOrderCtr++; + for (;curLocal->SuccVecIndex < (int)Nodes[curLocal->curNodeIndex].Succ.size(); + curLocal->SuccVecIndex++) + { + SuccIndex = Nodes[curLocal->curNodeIndex].Succ[curLocal->SuccVecIndex]; + if (!Visited[SuccIndex]) { + // Recurse. Retun to return point 0. + Stack.push(Locals(SuccIndex, 0, 0)); + goto BEGIN; + RETURN_POINT_0: + assert(!Stack.empty()); + curLocal = &(Stack.top()); // go back to previous context + } + } + Post.push_back(curLocal->curNodeIndex); + assert(Post.size()-1 == postOrderCtr); + Nodes[curLocal->curNodeIndex].postOrderNo = postOrderCtr++; + // We are done with this call. Return to appropriate return point. + if (curLocal->returnPoint == 0) { + Stack.pop(); + goto RETURN_POINT_0; + } else { + Stack.pop(); + assert(curLocal->returnPoint == 1); + goto RETURN_POINT_1; + } + + // End of all recursions. +RETURN_POINT_1: + assert(Stack.empty()); + assert(preOrderCtr == postOrderCtr); +} + +// Identify acyclic structures +ControlStructureTree::CSTNodeTy ControlStructureTree::AcyclicRegionType +(IntSet &N, int node, IntVector &nset) +{ + int m, n; + bool p, s; + + nset.clear(); + + // Follow the predecessor chain of node. + n = node; p = (Nodes[n].Pred.size() == 1); s = true; + while (p && s) { + nset.push_back(n); + n = Nodes[n].Pred[0]; + p = Nodes[n].Pred.size() == 1; + s = Nodes[n].Succ.size() == 1; + } + if (s) + nset.push_back(n); + // We want this in the reverse order (lexicogrphaically). + std::reverse(nset.begin(), nset.end()); + + // The last element now is "node", and we know it will + // surely get added again. So let us remove it to avoid + // duplicates. + assert(nset.back() == node); + nset.pop_back(); + + // Follow the successor chain of node. + n = node; p = true; s = (Nodes[n].Succ.size() == 1); + while (p && s) { + nset.push_back(n); + n = Nodes[n].Succ[0]; + p = Nodes[n].Pred.size() == 1; + s = Nodes[n].Succ.size() == 1; + } + if (p) + nset.push_back(n); + + if (nset.size() >= 2) + return CST_Block; + + // Check for IfThen and IfThenElse + if (Nodes[node].Succ.size() == 2) { + m = Nodes[node].Succ[0]; + n = Nodes[node].Succ[1]; + // Check for IfThenElse + if (Nodes[m].Succ.size() == 1 && Nodes[n].Succ.size() == 1 && + Nodes[m].Succ[0] == Nodes[n].Succ[0] && + Nodes[m].Pred.size() == 1 && Nodes[n].Pred.size() == 1) + { + // nset so far contains only node. Add other two. + assert(nset.size() == 1 && nset[0] == node); + nset.push_back(m); nset.push_back(n); + return CST_IfThenElse; + } + // Check for IfThen: Case 1/2 + if (Nodes[m].Succ.size() == 1 && Nodes[m].Succ[0] == n && + Nodes[m].Pred.size() == 1) + { + nset.push_back(m); + return CST_IfThen; + } + // Check for IfThen: Case 2/2 + if (Nodes[n].Succ.size() == 1 && Nodes[n].Succ[0] == m && + Nodes[n].Pred.size() == 1) + { + nset.push_back(n); + return CST_IfThen; + } + } + + // TODO: handle "switch" + return CST_None; +} + +ControlStructureTree::CSTNodeTy ControlStructureTree::ProperIntervalType +(IntSet &N, int node, IntVector &nset) +{ + nset.clear(); + + // Traverse the graph starting from "node", but not to nodes + // that "node" does not dominate. + // If we reach a node whose loop parent is not the same as that + // of node's, we do not consider it or go beyond it. + IntSet nSetSet; + std::stack WL; + Loop *nodeL = Nodes[node].iLoop; + + WL.push(node); + while (!WL.empty()) { + int cur = WL.top(); + WL.pop(); + + if (nSetSet.count(cur) != 0) + continue; + nSetSet.insert(cur); + + // Look at each successor + for (int succ : Nodes[cur].Succ) { + // add succ to WL if it is dominated by "node" and has same loop region. + // (if succ has an exit() or return, we still consider it). + if (dominates(node, succ) && + (Nodes[succ].iLoop == nodeL || ExitsFunction(succ)) && + nSetSet.count(succ) == 0) + { + WL.push(succ); + } + } + } + // If we have at least one other node, we consider it a proper interval. + if (nSetSet.size() > 1) { + nset.clear(); + for (int n : nSetSet) { + nset.push_back(n); + } + return CST_Proper; + } + + return CST_None; +} + +// Identify cyclic structures +ControlStructureTree::CSTNodeTy ControlStructureTree::CyclicRegionType +(IntSet &N, int node, IntVector &nset) +{ + // If there is just one node, only a CST_SelfLoop is useful. + if (nset.size() == 1) { + assert(nset[0] == node); + // if node->node edge exists + if (IntVectorContains(Nodes[node].Succ, node)) { + return CST_SelfLoop; + } else { + return CST_None; + } + } + + // Check for an Improper interval. + for (int m : nset) { + if (!Path(node, m, N)) { + // Its an improper region. + // TODO: Implement Minimize_Improper(). + return CST_Improper; + } + } + + // Check for CST_While. Pick any node from nset other than node. + int m = -1; + for (int k : nset) { + if (k != node) + m = k; + } + // There must exist an m != node + assert (m != -1); + + if (Nodes[node].Succ.size() == 2 && Nodes[m].Succ.size() == 1 && + Nodes[node].Pred.size() == 2 && Nodes[m].Pred.size() == 1) + { + return CST_WhileLoop; + } + + return CST_NaturalLoop; +} + +// Replace NodeSet by an abstract region node and set data structures. +// Return the new CSTNode representing the abstract region. +int ControlStructureTree::Reduce(IntSet &N, CSTNodeTy rtype, IntVector &NodeSet) +{ + // Create a new CSTNode. + int node = Nodes.size(); + Nodes.push_back(CSTNode(node, rtype)); + // If rtype is not a cyclic type, but NodeSet contains a cycle, + // we need to add a self loop to node. + if (!isCyclicCSTNodeTy(rtype) && NodeSetContainsCycle(NodeSet)) { + Nodes[node].Succ.push_back(node); + Nodes[node].Pred.push_back(node); + } + // Replace NodeSet by the the abstract "node". + Replace(N, node, NodeSet); + + // Set the edges in the tree. + for (int m : NodeSet) { + Nodes[m].StructOf = node; + Nodes[node].StructNodes.push_back(m); + } + + // Update the dominator for node + int ncd = nearestCommonDominator(NodeSet); + // The way we reduce nodes to one, we always ensure + // a single entry to the region, which must be the cdom. + // So cdom must be part of NodeSet. + assert(IntVectorContains(NodeSet, ncd)); + // The immediate dominator for "node" will be the immediate + // dominator of ncd. + int ncdiDom = Nodes[ncd].iDom; + Nodes[node].iDom = ncdiDom; + // If node is the new entry node, it won't have an iDom. + if (ncdiDom >= 0) { + assert(!IntVectorContains(NodeSet, ncdiDom)); + assert(!IntVectorContains(Nodes[ncdiDom].domTreeChildren, node)); + Nodes[ncdiDom].domTreeChildren.push_back(node); + } else { + assert(ncd == entryNode); + } + // Update dominator and loop info for affected nodes. + bool firstM = true; + Loop *comL = nullptr; + for (int m: NodeSet) { + // Update dominator info. + for (int domChild : Nodes[m].domTreeChildren) { + // If domChild is in NodeSet, don't do anything + // as it gets updated later. + if (!IntVectorContains(NodeSet, domChild)) { + assert(Nodes[domChild].iDom == m); + Nodes[domChild].iDom = node; + assert(!IntVectorContains(Nodes[node].domTreeChildren, domChild)); + Nodes[node].domTreeChildren.push_back(domChild); + } + } + // These nodes are no longer in the graph, clear info to avoid + // confusion and aid debugging. + Nodes[m].domTreeChildren.clear(); + int iDom = Nodes[m].iDom; + if (iDom >= 0) { + auto pos = std::find(Nodes[iDom].domTreeChildren.begin(), + Nodes[iDom].domTreeChildren.end(), m); + if (pos != Nodes[iDom].domTreeChildren.end()) { + // This need not always be true because iDom may be in NodeSet, + // in which case we might have cleared its domTreeChildren. + Nodes[iDom].domTreeChildren.erase(pos); + } + Nodes[m].iDom = -1; + } else { + assert(m == entryNode); + } + + // Gather loop info. + if (firstM && !ExitsFunction(m)) { + firstM = false; + comL = Nodes[m].iLoop; + } else { + assert(comL == Nodes[m].iLoop || ExitsFunction(m)); + } + } + // Update loop info. + if (comL != nullptr) { + if (isCyclicCSTNodeTy(rtype)) + Nodes[node].iLoop = comL->getParentLoop(); + else + Nodes[node].iLoop = comL; + } + + return node; +} + +// Link region "node" into abstract flow graph, adjust the postorder +// traversal, and update successors and predecessors. +void ControlStructureTree::Replace(IntSet &N, int node, IntVector &NodeSet) +{ + // Add "node" to N + N.insert(node); + + // 1. Find the highest numbered position of any node in NodeSet. + int highestPostOrderSeen = -1, highestPostOrderSeenFor; + for (int m : NodeSet) { + if (Nodes[m].postOrderNo > highestPostOrderSeen) { + highestPostOrderSeen = Nodes[m].postOrderNo; + assert(Post[highestPostOrderSeen] == m); + highestPostOrderSeenFor = m; + } + } + assert(highestPostOrderSeen >= 0); + // insert n into Post[] in the highest-numbered position of a node in nset + Post[highestPostOrderSeen] = node; + // Update pre/postorder numbers for "node". + Nodes[node].preOrderNo = Nodes[highestPostOrderSeenFor].preOrderNo; + Nodes[node].postOrderNo = Nodes[highestPostOrderSeenFor].postOrderNo; + // set PostCtr to the index of "node" in the resulting postorder. + PostCtr = highestPostOrderSeen; + + // 2. Remove all nodes in NodeSet from Post, N. + // To remove nodes from Post, we just mark it with "-1", + // so as to not upset the pre/postorder numbers we've computed + // 3. Update the abstract CFG (Succ/Pred) + for (int m : NodeSet) { + N.erase(m); + int postOrderNo = Nodes[m].postOrderNo; + // We just added "node" in position "highestPostOrderSeen". So ignore it. + if (postOrderNo != highestPostOrderSeen) { + Post[postOrderNo] = -1; + } + Nodes[m].preOrderNo = -1; + Nodes[m].postOrderNo = -1; + + // Redirect all edges coming into m. + for (unsigned i = 0; i < Nodes[m].Pred.size(); i++) { + int p = Nodes[m].Pred[i]; + if (IntVectorContains(NodeSet, p)) { + // The predecessor belongs to NodeSet, don't bother with it. + continue; + } + IntVector::iterator s = std::find(Nodes[p].Succ.begin(), + Nodes[p].Succ.end(), m); + assert(s != Nodes[p].Succ.end()); + // If p does not already have an edge to node, add one. + if (!IntVectorContains(Nodes[p].Succ, node)) { + *s = node; + } else { + // There already is an edge to node, just delete the one to m. + Nodes[p].Succ.erase(s); + } + // If node does not have an edge from p, add one. + if (!IntVectorContains(Nodes[node].Pred, p)) { + Nodes[node].Pred.push_back(p); + } + } + + // Redirect all edges going out of m. + for (unsigned i = 0; i < Nodes[m].Succ.size(); i++) { + int s = Nodes[m].Succ[i]; + if (IntVectorContains(NodeSet, s)) { + // The successor belongs to NodeSet, don't bother with it. + continue; + } + IntVector::iterator p = std::find(Nodes[s].Pred.begin(), + Nodes[s].Pred.end(), m); + assert(p != Nodes[s].Pred.end()); + // If s does not already have an edge from node, add one. + if (!IntVectorContains(Nodes[s].Pred, node)) { + *p = node; + } else { + // There already is an edge from node, just delete the one from m. + Nodes[s].Pred.erase(p); + } + // If node does not have an edge to s, add one. + if (!IntVectorContains(Nodes[node].Succ, s)) { + Nodes[node].Succ.push_back(s); + } + } + + Nodes[m].Succ.clear(); + Nodes[m].Pred.clear(); + } +} + +// Get the first common dominator for all nodes in NodeSet. +// This does not look for strong dominance, hence it is +// true that a node dominates itself. +int ControlStructureTree::nearestCommonDominator(IntVector &NodeSet) +{ + // Make a list of the dominators of NodeSet[0]. + std::vector domsOfFirst; + int cDom = NodeSet[0]; + while (cDom != -1) { + domsOfFirst.push_back(cDom); + cDom = Nodes[cDom].iDom; + } + + unsigned cDomI = 0; + for (unsigned i = 1; i < NodeSet.size(); i++) { + int cur = NodeSet[i]; + // Scan through domsOfFirst starting from cDomI + // and see if any of it dominates cur. + unsigned j; + for (j = cDomI; j < domsOfFirst.size(); j++) { + if (dominates(domsOfFirst[j], cur)) + break; + } + // At least the entry must dominate all + assert(j < domsOfFirst.size()); + if (j > cDomI) + cDomI = j; + } + + return domsOfFirst[cDomI]; +} + +// Does A dominate B. This is not a check for strict dominance. +bool ControlStructureTree::dominates(int A, int B) +{ + if (A == B) + return true; + + while (Nodes[B].iDom >= 0) { + if (Nodes[B].iDom == A) + return true; + B = Nodes[B].iDom; + } + + return false; +} + +// Is edge a backedge. Uses preOrderNo and postOrderNo to compute. +bool ControlStructureTree::isBackEdge(CSTNode &from, CSTNode &to) +{ + + // Reference: + // (http://www.cs.yale.edu/homes/aspnes/pinewiki/DepthFirstSearch.html) + // | Edge type of uv | start times | end times | + // --------------------------------------------------------- + // | Tree edge | start[u] < start[v] | end[u] > end[v] | + // | Back edge | start[u] > start[v] | end[u] < end[v] | + // | Forward edge | start[u] < start[v] | end[u] > end[v] | + // | Cross edge | start[u] > start[v] | end[u] > end[v] | + + assert(from.preOrderNo >= -1 && to.preOrderNo >= -1 && + from.postOrderNo >= -1 && to.postOrderNo >= -1); + if (from.preOrderNo >= to.preOrderNo && + from.postOrderNo <= to.postOrderNo) + return true; + + return false; +} + +bool ControlStructureTree::isCyclicCSTNodeTy(CSTNodeTy &ty) +{ + switch(ty) { + case CST_SelfLoop: + case CST_WhileLoop: + case CST_NaturalLoop: + case CST_Improper: + return true; + default: + return false; + } +} + +// Is there a cycle within this NodeSet +bool ControlStructureTree::NodeSetContainsCycle(IntVector &NodeSet) +{ + IntSet NS(NodeSet.begin(), NodeSet.end()); + for (int n : NS) { + if (Path(n, n, NS)) + return true; + } + + return false; +} + +// Returns true if there is a node "k" such that there is a +// (possibly empty) path from "m" to "k" that does not pass +// through "n" and an edge "k"->"n" that is a back edge. +// "N" is the set of nodes in the graph currently. +bool ControlStructureTree::PathBack(IntSet &N, int m, int n) +{ + // Look at every predecessor "k" of "n" such that + // "k"->"n" is a back edge. See if a path exists + // from "m" to "k" without going through "n". + IntSet N_n = N; N_n.erase(n); + for (int k : Nodes[n].Pred) { + if (isBackEdge(Nodes[k], Nodes[n]) && + ((m == k) || Path(m, k, N_n))) + { + return true; + } + } + + return false; +} + +// Returns true if there is a non-trivial path from n to m such +// that all nodes in the path are in I. Otherwise returns false. +// TODO: Make this efficient, use heuristics. +bool ControlStructureTree::Path(int n, int m, IntSet &I) +{ + + if (I.count(m) == 0 || I.count(n) == 0) + return false; + + // TODO: Use LoopInfo to check if there is a common + // loop around n, m (see Analysis/CFG.h:loopContainsBoth()). + // If so, can return true immediately. The difficulty + // is that LoopInfo is in terms of the original CFG. + + std::vector Visited(Nodes.size(), false); + std::stack WL; + WL.push(n); + + while (!WL.empty()) { + int cur = WL.top(); + WL.pop(); + + // A node may get added to WL multiple times before it is visited. + if (Visited[cur]) + continue; + Visited[cur] = true; + + // Visit each successor of cur that is present in I. + for (int succ : Nodes[cur].Succ) { + if (succ == m) + return true; + if (!Visited[succ] && I.count(succ) != 0) { + // TODO: If we add a check at the start of this + // routine for common outer loop, we then need + // not traverse the graph past the immediate post + // dominator of "m". This will prevent us from + // having to potentially traverse the whole graph. + WL.push(succ); + } + } + } + + return false; +} + +// Is A an ancestor of B in the control structure tree? +bool ControlStructureTree::isCSTAncestor(int A, int B) +{ + while (B >= 0) { + if (B == A) + return true; + B = Nodes[B].StructOf; + } + return false; +} + +int ControlStructureTree::getFunctionEntryNode(void) +{ + return entryNode; +} + +const ControlStructureTree::CSTNodeVector & +ControlStructureTree::getNodeList(void) +{ + return Nodes; +} + +void ControlStructureTree::analyze(DominatorTree *DTp, LoopInfo *LIp) +{ + DT = DTp; + LI = LIp; + + // Initialize the abstract CFG to the actual CFG + entryNode = initializeGraph(); + + // N below is the working graph. Its initialized + // to all the nodes in the original CFG. + IntSet N; + for (unsigned i = 0; i < Nodes.size(); i++) { + N.insert(i); + } + + // Lets run the main loop now. + do { + PostCtr = 0; + DFSPostorder(entryNode); + while (!N.empty() && PostCtr < Post.size()) { + int n = Post[PostCtr]; + if (n < 0) { + // If the node in this position has been + // deleted (by Replace()), ignore it. + PostCtr++; + continue; + } + // Locate an acyclic region, if present + IntVector NodeSet; + CSTNodeTy rtype = AcyclicRegionType(N, n, NodeSet); + if (rtype != CST_None) { + int p = Reduce(N, rtype, NodeSet); + if (IntVectorContains(NodeSet, entryNode)) + entryNode = p; + } else { + // Locate a cyclic region, if present. + IntVector ReachUnder; + ReachUnder.push_back(n); + for (int m : N) + if ((m != n) && PathBack(N, m, n)) + ReachUnder.push_back(m); + rtype = CyclicRegionType(N, n, ReachUnder); + if (rtype != CST_None) { + int p = Reduce(N, rtype, ReachUnder); + if (IntVectorContains(ReachUnder, entryNode)) + entryNode = p; + } else { + rtype = ProperIntervalType(N, n, NodeSet); + if (rtype != CST_None) { + int p = Reduce(N, rtype, NodeSet); + if (IntVectorContains(NodeSet, entryNode)) + entryNode = p; + } else { + PostCtr += 1; + } + } + } + } + } while (N.size() > 1); + + // Collapse CST_Block->CST_Block kind of edges. + mergeBlocks(entryNode); +}