Index: lib/Transforms/IPO/MergeFunctions.cpp =================================================================== --- lib/Transforms/IPO/MergeFunctions.cpp +++ lib/Transforms/IPO/MergeFunctions.cpp @@ -48,6 +48,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" @@ -60,10 +61,16 @@ #include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include #include using namespace llvm; @@ -72,6 +79,24 @@ STATISTIC(NumAliasesWritten, "Number of aliases generated"); STATISTIC(NumDoubleWeak, "Number of new functions created"); +static cl::opt MergeMinInsts("mergefunc-min-insts", + cl::Hidden, cl::init(4), + cl::desc("Min instructions required to even consider single block fns")); + +static cl::opt MergeDifferingMinInsts("mergefunc-diff-min-insts", + cl::Hidden, cl::init(15), + cl::desc("Min instructions required to try merging differing functions")); + +static cl::opt MergeMaxDiffering("mergefunc-max-diff", + cl::Hidden, cl::init(86), + cl::desc("Maximum number of differing instructions allowed")); + +static cl::opt MergeMinSimilarity("mergefunc-min-similarity", + cl::Hidden, cl::init(70), + cl::desc("Minimum percentage of similar instructions required")); + +static const char *MERGED_SUFFIX = ".mrg"; + /// Returns the type id for a type to be hashed. We turn pointer types into /// integers here because the actual compare logic below considers pointers and /// integers of the same size as equal. @@ -95,25 +120,220 @@ ID.AddInteger(getTypeIDForHash(FTy->getReturnType())); for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) ID.AddInteger(getTypeIDForHash(FTy->getParamType(i))); + if (F->size()) + ID.AddInteger(F->front().size()); return ID.ComputeHash(); } +/// Create a cast instruction if needed to cast V to type DstType. We treat +/// pointer and integer types of the same bitwidth as equivalent, so this can be +/// used to cast them to each other where needed. The function returns the Value +/// itself if no cast is needed, or a new CastInst instance inserted before +/// InsertBefore. The integer type equivalent to pointers must be passed as +/// IntPtrType (get it from DataLayout). This is guaranteed to generate no-op +/// casts, otherwise it will assert. +static Value *createCastIfNeeded(Value *V, Type *DstType, + Instruction *InsertBefore, Type *IntPtrType) { + if (V->getType() == DstType) + return V; + + CastInst *Result; + Type *OrigType = V->getType(); + + if (OrigType->isPointerTy() && + (DstType->isIntegerTy() || DstType->isPointerTy())) { + Result = CastInst::CreatePointerCast(V, DstType, "", InsertBefore); + } else if (OrigType->isIntegerTy() && DstType->isPointerTy() && + OrigType == IntPtrType) { + // Int -> Ptr + Result = CastInst::Create(CastInst::IntToPtr, V, DstType, "", InsertBefore); + } else { + llvm_unreachable("Can only cast int -> ptr or ptr -> (ptr or int)"); + } + + assert(cast(Result)->isNoopCast(IntPtrType) && + "Cast is not a no-op cast. Potential loss of precision"); + + return Result; +} + +/// Replace Inst1 by a switch statement that executes Inst1 or one of Inst2s +/// depending on the value of SwitchVal. If a value in Inst2s is NULL, it +/// defaults to executing Inst1. Returns set of terminator instructions of newly +/// created switch blocks in Ret. +/// +/// For instance, the transformation may look as follows: +/// ...Head... +/// Inst1 with all of Insts2s without parents +/// ...Tail... +/// into +/// ...Head... +/// Switch +/// / | \ ... +/// (default) (1) (2) +/// Inst1 Inst2s[0] Inst2s[1] +/// Ret[0] Ret[1] Ret[2] +/// \ | / +/// ...Tail... +/// +static void SplitBlockAndInsertSwitch( + Value *SwitchVal, Instruction *Inst1, + SmallVectorImpl &Inst2s, + SmallVectorImpl &Ret, + Type *IntPtrTy) { + // Split block + BasicBlock *Head = Inst1->getParent(); + BasicBlock *Tail = Head->splitBasicBlock(Inst1); + + // Create default block + LLVMContext &C = Head->getContext(); + BasicBlock *DefaultBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + + // Insert switch instruction at end of Head + TerminatorInst *HeadOldTerm = Head->getTerminator(); + SwitchInst *Switch = SwitchInst::Create(SwitchVal, DefaultBlock, + Inst2s.size()); + ReplaceInstWithInst(HeadOldTerm, Switch); + + // Move instructions into the blocks + if (Inst1->isTerminator()) { + Inst1->removeFromParent(); + DefaultBlock->getInstList().push_back(Inst1); + Ret.push_back(cast(Inst1)); + } else { + TerminatorInst *DefaultTerm = BranchInst::Create(Tail, DefaultBlock); + Inst1->moveBefore(DefaultTerm); + Ret.push_back(DefaultTerm); + } + + for (unsigned InstPos = 0, InstNum = Inst2s.size(); InstPos < InstNum; + ++InstPos) { + Instruction *Inst2 = Inst2s[InstPos]; + if (!Inst2) { + Ret.push_back(NULL); + continue; + } + + BasicBlock *CaseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + + if (Inst2->isTerminator()) { + assert(Inst1->isTerminator() && + "Inst1 and Inst2 must both be terminators or non-terminators!"); + CaseBlock->getInstList().push_back(Inst2); + Ret.push_back(cast(Inst2)); + } else { + TerminatorInst *CaseTerm = BranchInst::Create(Tail, CaseBlock); + Inst2->insertBefore(CaseTerm); + Ret.push_back(CaseTerm); + } + + Switch->addCase(ConstantInt::get(cast(SwitchVal->getType()), + InstPos+1), + CaseBlock); + } + + // If Inst1 (and Inst2s) are TerminatorInst's, Tail will be empty and can be + // deleted now. We also need to update PHI nodes to add the additional + // incoming blocks from the SwitchInst. + bool HavePhi = false; + if (Inst1->isTerminator()) { + for (succ_iterator I = succ_begin(DefaultBlock), E = succ_end(DefaultBlock); + I != E; ++I) { + BasicBlock *Successor = *I; + PHINode *Phi; + + for (BasicBlock::iterator II = Successor->begin(); + (Phi = dyn_cast(II)); ++II) + for (unsigned ValId = 0, ValEnd = Phi->getNumIncomingValues(); + ValId != ValEnd; ++ValId) + if (Phi->getIncomingBlock(ValId) == Tail) { + Phi->setIncomingBlock(ValId, DefaultBlock); + + bool Inst1UsedInPhi = false; + if (Phi->getIncomingValue(ValId) == Inst1) { + Inst1UsedInPhi = true; + HavePhi = true; + } + + SmallVectorImpl::iterator + SwitchI = Ret.begin(), SwitchE = Ret.end(); + for (++SwitchI; SwitchI != SwitchE; ++SwitchI) { + if (!*SwitchI) + continue; + if (Inst1UsedInPhi) + Phi->addIncoming(*SwitchI, (*SwitchI)->getParent()); + else + Phi->addIncoming(Phi->getIncomingValue(ValId), + (*SwitchI)->getParent()); + } + } + } + + Tail->eraseFromParent(); + } + + // If the instructions have uses, we need to insert a PHI node + if (!Inst1->use_empty() && !HavePhi) { + // We treat all pointer types as equal, so we may need to insert + // a bitcast to ensure that all incoming values of the PHI node have the + // same type + Type *F1IType = Inst1->getType(); + BasicBlock *TailBB = Ret[0]->getSuccessor(0); + PHINode *Phi = PHINode::Create(F1IType, Inst2s.size(), "", + &TailBB->front()); + Inst1->replaceAllUsesWith(Phi); + + Phi->addIncoming(Inst1, Inst1->getParent()); + for (unsigned FnI = 0, FnE = Inst2s.size(); FnI != FnE; ++FnI) { + Instruction *F2InstInNewF = Inst2s[FnI]; + if (!F2InstInNewF) + continue; + + if (F2InstInNewF->getType() != F1IType) { + assert(!F2InstInNewF->isTerminator() && + "Cannot cast result of terminator instruction"); + + F2InstInNewF = cast( + createCastIfNeeded(F2InstInNewF, + F1IType, + Ret[FnI+1], + IntPtrTy)); + } + + Phi->addIncoming(F2InstInNewF, F2InstInNewF->getParent()); + } + } +} + +/// Insert function NewF into module, placing it immediately after the +/// existing function PredF. If PredF does not exist, insert at the end. +static void insertFunctionAfter(Function *NewF, Function *PredF) { + Module *M = PredF->getParent(); + Module::FunctionListType &FList = M->getFunctionList(); + + for (Module::FunctionListType::iterator I = FList.begin(), E = FList.end(); + I != E; ++I) { + if (PredF == I) { + FList.insertAfter(I, NewF); + return; + } + } + + // Couldn't find PredF, insert at end + FList.push_back(NewF); +} + namespace { /// ComparableFunction - A struct that pairs together functions with a /// DataLayout so that we can keep them together as elements in the DenseSet. class ComparableFunction { public: - static const ComparableFunction EmptyKey; - static const ComparableFunction TombstoneKey; - static DataLayout * const LookupOnly; - - ComparableFunction(Function *Func, DataLayout *TD) - : Func(Func), Hash(profileFunction(Func)), TD(TD) {} + ComparableFunction(Function *Func) + : Func(Func), IsNew(true) {} Function *getFunc() const { return Func; } - unsigned getHash() const { return Hash; } - DataLayout *getTD() const { return TD; } + bool isNew() const { return IsNew; } // Drops AssertingVH reference to the function. Outside of debug mode, this // does nothing. @@ -123,37 +343,14 @@ Func = NULL; } + void markCompared() { + IsNew = false; + } private: - explicit ComparableFunction(unsigned Hash) - : Func(NULL), Hash(Hash), TD(NULL) {} - AssertingVH Func; - unsigned Hash; - DataLayout *TD; + bool IsNew; }; -const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0); -const ComparableFunction ComparableFunction::TombstoneKey = - ComparableFunction(1); -DataLayout *const ComparableFunction::LookupOnly = (DataLayout*)(-1); - -} - -namespace llvm { - template <> - struct DenseMapInfo { - static ComparableFunction getEmptyKey() { - return ComparableFunction::EmptyKey; - } - static ComparableFunction getTombstoneKey() { - return ComparableFunction::TombstoneKey; - } - static unsigned getHashValue(const ComparableFunction &CF) { - return CF.getHash(); - } - static bool isEqual(const ComparableFunction &LHS, - const ComparableFunction &RHS); - }; } namespace { @@ -164,21 +361,71 @@ /// side of claiming that two functions are different). class FunctionComparator { public: - FunctionComparator(const DataLayout *TD, const Function *F1, - const Function *F2) - : F1(F1), F2(F2), TD(TD) {} + FunctionComparator(const DataLayout *TD, Function *F1, Function *F2) + : isDifferent(false), isNotMergeable(false), + BasicBlockCount(0), InstructionCount(0), DifferingInstructionsCount(0), + F1(F1), F2(F2), SimilarityMetric(0), TD(TD) {} - /// Test whether the two functions have equivalent behaviour. + /// Test whether the two functions have equivalent behaviour. Returns true if + /// they are equal or can be merged, false if not. bool compare(); -private: - /// Test whether two basic blocks have equivalent behaviour. - bool compare(const BasicBlock *BB1, const BasicBlock *BB2); + /// Indicate whether the two functions are an exact match after comparison + bool isExactMatch(); + + /// Indicate whether the two functions candidates for merging after comparison + bool isMergeCandidate(); + + /// Get a similarity metric between the two functions. Higher means more + /// similar. + unsigned getSimilarityMetric() { + if (!SimilarityMetric) + SimilarityMetric = (unsigned)(((float)InstructionCount - + DifferingInstructionsCount)/InstructionCount*10000); + return SimilarityMetric; + } + + Function *getF1() { return F1; } + Function *getF2() { return F2; } + ValueToValueMapTy &getF1toF2Map() { return id_map; } + ValueToValueMapTy &getF2toF1Map() { return seen_values; } + const DataLayout *getDataLayout() { return TD; } /// Assign or look up previously assigned numbers for the two values, and /// return whether the numbers are equal. Numbers are assigned in the order - /// visited. - bool enumerate(const Value *V1, const Value *V2); + /// visited. If NoSelfRef is set, F1 and F2 are not assigned to each other + /// (treated as 'equal'). + bool enumerate(const Value *V1, const Value *V2, bool NoSelfRef=false); + + /// Compare two Types, treating all pointer types as equal. + bool isEquivalentType(Type *Ty1, Type *Ty2) const; + + /// Instructions that differ between the two functions (F1's -> F2's inst). + DenseMap DifferingInstructions; + + /// Instructions that reference F1/F2 itself (recursive calls etc.) + /// These may need special treatment when merging differing functions. + DenseMap SelfRefInstructions; + + bool isDifferent; + bool isNotMergeable; + + // Comparison statistics + unsigned BasicBlockCount; + unsigned InstructionCount; + unsigned DifferingInstructionsCount; + +private: + /// Test whether two basic blocks have equivalent behaviour. Returns true if + /// they are equal or can be merged, false if not. PHINodes are not compared + /// in this function, but added to the PHIsFound list for delayed processing. + bool compare(const BasicBlock *BB1, const BasicBlock *BB2, + std::list > *PHIsFound); + + /// Compare pairs of PHI nodes. Returns true if all pairs are equal or can + /// be merged, false if not. + bool comparePHIs( + const std::list > &PHIs); /// Compare two Instructions for equivalence, similar to /// Instruction::isSameOperationAs but with modifications to the type @@ -193,16 +440,14 @@ return isEquivalentGEP(cast(GEP1), cast(GEP2)); } - /// Compare two Types, treating all pointer types as equal. - bool isEquivalentType(Type *Ty1, Type *Ty2) const; - // The two functions undergoing comparison. - const Function *F1, *F2; + Function *F1, *F2; - const DataLayout *TD; + unsigned SimilarityMetric; - DenseMap id_map; - DenseSet seen_values; + const DataLayout *TD; + ValueToValueMapTy id_map; + ValueToValueMapTy seen_values; }; } @@ -324,6 +569,9 @@ SI->getAlignment() == cast(I2)->getAlignment() && SI->getOrdering() == cast(I2)->getOrdering() && SI->getSynchScope() == cast(I2)->getSynchScope(); + if (const AllocaInst *AI = dyn_cast(I1)) + return AI->getArraySize() == cast(I2)->getArraySize() && + AI->getAllocatedType() == cast(I2)->getAllocatedType(); if (const CmpInst *CI = dyn_cast(I1)) return CI->getPredicate() == cast(I2)->getPredicate(); if (const CallInst *CI = dyn_cast(I1)) @@ -388,13 +636,14 @@ // Compare two values used by the two functions under pair-wise comparison. If // this is the first time the values are seen, they're added to the mapping so // that we will detect mismatches on next use. -bool FunctionComparator::enumerate(const Value *V1, const Value *V2) { +bool FunctionComparator::enumerate(const Value *V1, const Value *V2, + bool NoSelfRef/*=false*/) { // Check for function @f1 referring to itself and function @f2 referring to // itself, or referring to each other, or both referring to either of them. // They're all equivalent if the two functions are otherwise equivalent. - if (V1 == F1 && V2 == F2) + if (!NoSelfRef && V1 == F1 && V2 == F2) return true; - if (V1 == F2 && V2 == F1) + if (!NoSelfRef && V1 == F2 && V2 == F1) return true; if (const Constant *C1 = dyn_cast(V1)) { @@ -406,7 +655,10 @@ isEquivalentType(C1->getType(), C2->getType())) return true; // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1 - // then they must have equal bit patterns. + // then they must have equal bit patterns. Aggregate types cannot be + // bitcast. + if (C1->getType()->isAggregateType() || C2->getType()->isAggregateType()) + return false; return C1->getType()->canLosslesslyBitCastTo(C2->getType()) && C1 == ConstantExpr::getBitCast(const_cast(C2), C1->getType()); } @@ -419,37 +671,98 @@ // ensure that V2 isn't already equivalent to something else. For this // purpose, we track the V2 values in a set. - const Value *&map_elem = id_map[V1]; - if (map_elem) - return map_elem == V2; - if (!seen_values.insert(V2).second) + ValueToValueMapTy::iterator I = id_map.find(V1); + if (I != id_map.end()) + return V2 == I->second; + // FIXME: Const casts!!! + if (!seen_values.insert(std::make_pair(V2, const_cast(V1))).second) return false; - map_elem = V2; + id_map[V1] = const_cast(V2); return true; } -// Test whether two basic blocks have equivalent behaviour. -bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) { - BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end(); - BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end(); +/// Test whether two basic blocks have equivalent behaviour. Returns true if the +/// blocks can be merged, false if they cannot. Differing instructions are +/// recorded in DifferingInstructions. +bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2, + std::list > *PHIsFound) { + BasicBlock::const_iterator F1I, F1E, F2I, F2E; + + for (F1I = BB1->begin(), F1E = BB1->end(), + F2I = BB2->begin(), F2E = BB2->end(); + F1I != F1E && F2I != F2E; ++F1I, ++F2I) { + // Skip debug information + const CallInst *DbgCall; + while (F1I != F1E && (DbgCall = dyn_cast(F1I)) && + DbgCall->getCalledFunction() && + DbgCall->getCalledFunction()->hasName() && + DbgCall->getCalledFunction()->getName().startswith("llvm.dbg.")) + ++F1I; + + while (F2I != F2E && (DbgCall = dyn_cast(F2I)) && + DbgCall->getCalledFunction() && + DbgCall->getCalledFunction()->hasName() && + DbgCall->getCalledFunction()->getName().startswith("llvm.dbg.")) + ++F2I; + + if (F1I == F1E || F2I == F2E) + break; + + // Ok, we're dealing with real instructions. Check a few cases that will + // prevent merging first. + + // Cannot merge insts that differ in whether they have uses + if (F1I->use_empty() != F2I->use_empty()) { + // TODO: Could implement merging for this case (would need to introduce a + // dummy value in the PHI node etc.) + return false; + } + + // Cannot merge insts whose types are non-equivalent + if (!isEquivalentType(F1I->getType(), F2I->getType())) { + return false; + } - do { - if (!enumerate(F1I, F2I)) + // TODO: Currently cannot merge InvokeInsts with differing result types + // that have uses. We cannot push up a bitcast into their block after + // them because they are terminators. Would need to insert an + // additional BB. + if (isa(F1I) && !F1I->use_empty() && + F1I->getType() != F2I->getType()) return false; + if (!enumerate(F1I, F2I)) + goto differing_instructions; + if (const GetElementPtrInst *GEP1 = dyn_cast(F1I)) { const GetElementPtrInst *GEP2 = dyn_cast(F2I); if (!GEP2) - return false; + goto differing_instructions; if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) - return false; + goto differing_instructions; if (!isEquivalentGEP(GEP1, GEP2)) + goto differing_instructions; + } else if (const PHINode *Phi1 = dyn_cast(F1I)) { + const PHINode *Phi2 = dyn_cast(F2I); + // We can't currently merge a PHI and non-PHI instruction + if (!Phi2) return false; + + // We can't currently merge PHI nodes with different numbers of incoming + // values + if (F1I->getNumOperands() != F2I->getNumOperands()) + return false; + + // We need to treat PHI nodes specially. Their incoming values may be in a + // different order even if they are equivalent. We can't compare them + // until we've seen the incoming blocks and know which values are + // equivalent. Therefore postpone PHINode comparison until the end. + PHIsFound->push_back(std::make_pair(Phi1, Phi2)); } else { if (!isEquivalentOperation(F1I, F2I)) - return false; + goto differing_instructions; assert(F1I->getNumOperands() == F2I->getNumOperands()); for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) { @@ -457,50 +770,100 @@ Value *OpF2 = F2I->getOperand(i); if (!enumerate(OpF1, OpF2)) - return false; + goto differing_instructions; if (OpF1->getValueID() != OpF2->getValueID() || !isEquivalentType(OpF1->getType(), OpF2->getType())) - return false; + goto differing_instructions; + + if ((OpF1 == F1 && OpF2 == F2) || (OpF1 == F2 && OpF2 == F1)) + SelfRefInstructions[F1I] = F2I; } } + continue; + + differing_instructions: + // Cannot merge functions with differing landing pad instructions yet. They + // would need special treatment which involves updating the corresponding + // invoke instructions. + if (isa(F1I)) + return false; - ++F1I, ++F2I; - } while (F1I != F1E && F2I != F2E); + DifferingInstructions[F1I] = F2I; + } + // We cannot currently merge basic blocks with different instruction counts return F1I == F1E && F2I == F2E; } +bool FunctionComparator::comparePHIs( + const std::list > &PHIs) { + if (PHIs.empty()) + return true; + + for (std::list >::const_iterator + I = PHIs.begin(), E = PHIs.end(); I != E; ++I) { + const PHINode *Phi1 = I->first, *Phi2 = I->second; + + for (unsigned ValId = 0, ValNum = Phi1->getNumIncomingValues(); + ValId < ValNum; ++ValId) { + Value *Phi1Val = Phi1->getIncomingValue(ValId); + + // Get corresponding Phi2Val + Value *BBinPhi2Val = getF1toF2Map()[Phi1->getIncomingBlock(ValId)]; + + if (!BBinPhi2Val) + return false; // Currently can't handle differing predecessor blocks + + BasicBlock *BBinPhi2 = cast(BBinPhi2Val); + Value *Phi2Val = Phi2->getIncomingValueForBlock(BBinPhi2); + + // Enumerate the values. If the PHI node references the function itself (a + // very rare case), we mark it as different (NoSelfRef). This is only + // necessary for outline merging, not equiv merging. TODO: Make equal + // merging possible with such PHI nodes. + if (!enumerate(Phi1Val, Phi2Val,/*NoSelfRef=*/true)) { + DifferingInstructions[Phi1] = Phi2; + break; + } + } + } + + return true; +} + // Test whether the two functions have equivalent behaviour. bool FunctionComparator::compare() { // We need to recheck everything, but check the things that weren't included // in the hash first. - if (F1->getAttributes() != F2->getAttributes()) - return false; + goto not_mergeable; if (F1->hasGC() != F2->hasGC()) - return false; + goto not_mergeable; if (F1->hasGC() && F1->getGC() != F2->getGC()) - return false; + goto not_mergeable; if (F1->hasSection() != F2->hasSection()) - return false; + goto not_mergeable; if (F1->hasSection() && F1->getSection() != F2->getSection()) - return false; + goto not_mergeable; if (F1->isVarArg() != F2->isVarArg()) - return false; + goto not_mergeable; + + if (F1->size() != F2->size()) + goto not_mergeable; - // TODO: if it's internal and only used in direct calls, we could handle this - // case too. + // TODO: if it's internal and only used in direct calls, we could handle + // this case too. if (F1->getCallingConv() != F2->getCallingConv()) - return false; + goto not_mergeable; if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType())) - return false; + goto not_mergeable; assert(F1->arg_size() == F2->arg_size() && "Identically typed functions have different numbers of args!"); @@ -517,33 +880,302 @@ // linked list is immaterial. Our walk starts at the entry block for both // functions, then takes each block from each terminator in order. As an // artifact, this also means that unreachable blocks are ignored. - SmallVector F1BBs, F2BBs; - SmallSet VisitedBBs; // in terms of F1. + { + SmallVector F1BBs, F2BBs; + SmallSet VisitedBBs; // in terms of F1. + std::list > PHIsFound; + + F1BBs.push_back(&F1->getEntryBlock()); + F2BBs.push_back(&F2->getEntryBlock()); + + VisitedBBs.insert(F1BBs[0]); + while (!F1BBs.empty()) { + const BasicBlock *F1BB = F1BBs.pop_back_val(); + const BasicBlock *F2BB = F2BBs.pop_back_val(); + + // Check for control flow divergence + if (!enumerate(F1BB, F2BB)) + goto not_mergeable; + + const TerminatorInst *F1TI = F1BB->getTerminator(); + const TerminatorInst *F2TI = F2BB->getTerminator(); + + // TODO: Implement merging of blocks with different numbers of instructions. + if (F1TI->getNumSuccessors() != F2TI->getNumSuccessors() || + F1BB->size() != F2BB->size()) + goto not_mergeable; + + // The actual instruction-by-instruction comparison + if (!compare(F1BB, F2BB, &PHIsFound)) + goto not_mergeable; + + // FIXME: Count this in compare(F1BB,F2BB) so it doesn't include debug + // instructions. + InstructionCount += std::max(F1BB->size(), F2BB->size()); + + assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors()); + for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(F1TI->getSuccessor(i))) + continue; - F1BBs.push_back(&F1->getEntryBlock()); - F2BBs.push_back(&F2->getEntryBlock()); + F1BBs.push_back(F1TI->getSuccessor(i)); + F2BBs.push_back(F2TI->getSuccessor(i)); + } + } - VisitedBBs.insert(F1BBs[0]); - while (!F1BBs.empty()) { - const BasicBlock *F1BB = F1BBs.pop_back_val(); - const BasicBlock *F2BB = F2BBs.pop_back_val(); + BasicBlockCount = VisitedBBs.size(); - if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB)) - return false; + // After we've seen all values and BBs, compare the PHI nodes + if (!comparePHIs(PHIsFound)) + goto not_mergeable; + } + + if (DifferingInstructions.size()) { + // Currently we can't merge vararg functions with differing instructions. + // TODO: Explore whether this is feasible; the difficult bit is the + // additional argument we need to add. + if (F1->isVarArg()) + goto not_mergeable; + + isDifferent = true; + DifferingInstructionsCount += DifferingInstructions.size(); + + DEBUG(dbgs() << "Similar fns: " << F1->getName() << " and " << F2->getName() + << " bbs=" << BasicBlockCount << " insts=" << InstructionCount + << " different=" << DifferingInstructionsCount << " metric=" + << getSimilarityMetric() << '\n'); + } - const TerminatorInst *F1TI = F1BB->getTerminator(); - const TerminatorInst *F2TI = F2BB->getTerminator(); + return true; - assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors()); - for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) { - if (!VisitedBBs.insert(F1TI->getSuccessor(i))) - continue; +not_mergeable: + // Fail: cannot merge the two functions + isNotMergeable = true; + return false; +} + +bool FunctionComparator::isExactMatch() { + return (!isNotMergeable && !isDifferent); +} + +bool FunctionComparator::isMergeCandidate() { + if (isNotMergeable) + return false; + + if (!isDifferent) + return true; + + // Heuristic when to attempt merging + if (InstructionCount > MergeDifferingMinInsts && + DifferingInstructionsCount <= MergeMaxDiffering && + getSimilarityMetric() > MergeMinSimilarity*100) { + return true; + } else { + DEBUG(dbgs() << "Candidate excluded by parameters.\n"); + } + + return false; +} + +namespace { + +struct FunctionComparatorOrdering { + bool operator () (FunctionComparator *LHS, FunctionComparator *RHS) { + unsigned MetricLHS = LHS->getSimilarityMetric(), + MetricRHS = RHS->getSimilarityMetric(); + + // Fall back to pointer comparison with identical metrics. + if (MetricLHS == MetricRHS) + return LHS > RHS; + return MetricLHS > MetricRHS; + } +}; + +class MergeRegistry { +public: + typedef MapVector > FnCompareMap; + typedef std::set + FnComparatorSet; + typedef std::map SimilarFnMap; + + ~MergeRegistry(); - F1BBs.push_back(F1TI->getSuccessor(i)); - F2BBs.push_back(F2TI->getSuccessor(i)); + /// Defer a function for consideration in the next round. + void defer(Function *F); + + /// Return true if we have deferred functions that can be enqueued. + bool haveDeferred() { return !Deferred.empty(); } + + /// Move all the deferred functions into buckets to consider them for merging. + /// Returns number of functions that have been added. + unsigned enqueue(); + + /// Add a candidate for merging + void insertCandidate(FunctionComparator *Comp); + + /// Remove a Function from the FnSet and queue it up for a second sweep of + /// analysis if Reanalyze is set. If it is a candidate for merging, remove it + /// from consideration. + void remove(Function *F, bool Reanalyze=true); + + /// Return the similarity metric of the most similar function to F that is + /// not listed in the Ignore set. + unsigned getMaxSimilarity(Function *F, const DenseSet &Ignore); + + /// The collection of buckets that contain functions that may be similar to + /// each other (same hash value). + FnCompareMap FunctionsToCompare; + + std::list FunctionsToMerge; + SimilarFnMap SimilarFunctions; + +private: + typedef std::vector FnDeferredQueue; + + /// A work queue of functions that may have been modified and should be + /// analyzed again. + FnDeferredQueue Deferred; +}; + +} // end anonymous namespace + +MergeRegistry::~MergeRegistry() { + for (std::list::iterator + I = FunctionsToMerge.begin(), E = FunctionsToMerge.end(); + I != E; ++I) + delete *I; +} + +static bool isComparisonCandidate(Function *F) { + // Ignore declarations and tiny functions - no point in merging those + return (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && + !(F->size() == 1 && F->begin()->size() < MergeMinInsts) && + !F->getName().endswith(MERGED_SUFFIX)); +} + +void MergeRegistry::defer(Function *F) { + if (isComparisonCandidate(F)) + Deferred.push_back(F); +} + +// Move functions from Deferred into buckets. remove() may have been called +// multiple times for the same function, so eliminate duplicates using the +// set. We reverse them because MergeFunctions::insert inserts at the front +// of each bucket. +unsigned MergeRegistry::enqueue() { + DenseSet InsertedFuncs; + + for (std::vector::reverse_iterator + DefI = Deferred.rbegin(), DefE = Deferred.rend(); + DefI != DefE; ++DefI) { + if (!*DefI) continue; + Function *F = cast(*DefI); + if (InsertedFuncs.find(F) != InsertedFuncs.end()) continue; + if (!isComparisonCandidate(F)) continue; + + unsigned Hash = profileFunction(F); + FunctionsToCompare[Hash].push_front(F); + + InsertedFuncs.insert(F); + } + + Deferred.clear(); + + return InsertedFuncs.size(); +} + +static void dumpSimilar(MergeRegistry::SimilarFnMap &Map, raw_ostream &O) { + O << "**** Similar function set dump:\n"; + + for (MergeRegistry::SimilarFnMap::iterator I = Map.begin(), E = Map.end(); + I != E; ++I) { + Function *F = I->first; + MergeRegistry::FnComparatorSet &S = I->second; + + O << " - " << F->getName() << ":\n"; + for (MergeRegistry::FnComparatorSet::iterator II = S.begin(), EE = S.end(); + II != EE; ++II) { + FunctionComparator *C = *II; + O << " " << C->getF1()->getName() << " and " + << C->getF2()->getName() << " metric " << C->getSimilarityMetric() + << "\n"; } } - return true; +} + +void MergeRegistry::insertCandidate(FunctionComparator *Comp) { + FunctionsToMerge.push_back(Comp); + SimilarFunctions[Comp->getF1()].insert(Comp); +} + +static void removeFromBucket(Function *F, + std::list &Bucket) { + for (std::list::iterator + I = Bucket.begin(), E = Bucket.end(); I != E; ++I) { + if (I->getFunc() == F) { + Bucket.erase(I); + return; + } + } +} + +void MergeRegistry::remove(Function *F, bool Reanalyze/*=true*/) { + + // There is no need to remove a function that is not already + // in a bucket. + if (!isComparisonCandidate(F)) + return; + + unsigned Hash = profileFunction(F); + std::list &Bucket = FunctionsToCompare[Hash]; + + removeFromBucket(F, Bucket); + + if (Reanalyze) + defer(F); + + // Check whether we have any existing FunctionComparator objects for this fn. + // If yes, discard them because F has changed. Retry merging for those + // functions by adding them to Deferred. + std::list::iterator I = FunctionsToMerge.begin(); + while (I != FunctionsToMerge.end()) { + FunctionComparator *Comp = *I; + if (Comp->getF1() == F) { + Function *OtherF = Comp->getF2(); + defer(OtherF); + removeFromBucket(OtherF, Bucket); + if (!SimilarFunctions[F].erase(Comp)) + llvm_unreachable("Inconsistent SimilarFunctions set"); + I = FunctionsToMerge.erase(I); + delete Comp; + } else if (Comp->getF2() == F) { + Function *OtherF = Comp->getF1(); + defer(OtherF); + removeFromBucket(OtherF, Bucket); + if (!SimilarFunctions[OtherF].erase(Comp)) + llvm_unreachable("Inconsistent SimilarFunctions set"); + I = FunctionsToMerge.erase(I); + delete Comp; + } else { + ++I; + } + } +} + +unsigned MergeRegistry::getMaxSimilarity(Function *F, + const DenseSet &Ignore) { + FnComparatorSet &Similar = SimilarFunctions[F]; + + for (FnComparatorSet::iterator I = Similar.begin(), E = Similar.end(); + I != E; ++I) { + FunctionComparator *Comp = *I; + if (Ignore.count(Comp->getF2())) + continue; + + return Comp->getSimilarityMetric(); + } + + return 0; } namespace { @@ -555,7 +1187,7 @@ /// class MergeFunctions : public ModulePass { public: - static char ID; + static char ID; MergeFunctions() : ModulePass(ID), HasGlobalAliases(false) { initializeMergeFunctionsPass(*PassRegistry::getPassRegistry()); @@ -564,20 +1196,6 @@ bool runOnModule(Module &M); private: - typedef DenseSet FnSetType; - - /// A work queue of functions that may have been modified and should be - /// analyzed again. - std::vector Deferred; - - /// Insert a ComparableFunction into the FnSet, or merge it away if it's - /// equal to one that's already present. - bool insert(ComparableFunction &NewF); - - /// Remove a Function from the FnSet and queue it up for a second sweep of - /// analysis. - void remove(Function *F); - /// Find the functions that use this Value and remove them from FnSet and /// queue the functions. void removeUsers(Value *V); @@ -586,11 +1204,29 @@ /// necessary to make types match. void replaceDirectCallers(Function *Old, Function *New); + /// Process functions in the specified bucket, by either doing equiv merging + /// marking them for diff merging. Returns false if the bucket needs to be + /// re-scanned after an equiv merge. Sets Changed if the module was changed by + /// equiv merge. + bool mergeBucket(std::list &Fns, bool &Changed); + + /// Exhaustively compare all functions in each bucket and do equiv merging + /// where possible. Functions that have already been compared will not be + /// compared again. Returns true if the module was modified. + bool doExhaustiveCompareMerge(); + + /// Merge all the functions marked for diff merging. Returns true if the + /// module was modified. + bool doDiffMerge(); + /// Merge two equivalent functions. Upon completion, G may be deleted, or may /// be converted into a thunk. In either case, it should never be visited /// again. void mergeTwoFunctions(Function *F, Function *G); + /// Merge a set of functions with differences. + void outlineAndMergeFunctions(SmallVectorImpl &Fns); + /// Replace G with a thunk or an alias to F. Deletes G. void writeThunkOrAlias(Function *F, Function *G); @@ -598,16 +1234,20 @@ /// of G with bitcast(F). Deletes G. void writeThunk(Function *F, Function *G); + /// Replace G with a tail call to F with an additional argument. + /// + void writeThunkWithChoice(Function *NewF, Function *OldF, int Choice); + /// Replace G with an alias to F. Deletes G. void writeAlias(Function *F, Function *G); - /// The set of all distinct functions. Use the insert() and remove() methods - /// to modify it. - FnSetType FnSet; - /// DataLayout for more accurate GEP comparisons. May be NULL. DataLayout *TD; + /// Merge registry. Stores all the information about functions being + /// considered for merging as well as current candidates for merging. + MergeRegistry Registry; + /// Whether or not the target supports global aliases. bool HasGlobalAliases; }; @@ -623,76 +1263,26 @@ bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; + TD = getAnalysisIfAvailable(); - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) - Deferred.push_back(WeakVH(I)); - } - FnSet.resize(Deferred.size()); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + Registry.defer(I); do { - std::vector Worklist; - Deferred.swap(Worklist); + unsigned InsertCount = Registry.enqueue(); DEBUG(dbgs() << "size of module: " << M.size() << '\n'); - DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); + DEBUG(dbgs() << "size of worklist: " << InsertCount << '\n'); - // Insert only strong functions and merge them. Strong function merging - // always deletes one of them. - for (std::vector::iterator I = Worklist.begin(), - E = Worklist.end(); I != E; ++I) { - if (!*I) continue; - Function *F = cast(*I); - if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && - !F->mayBeOverridden()) { - ComparableFunction CF = ComparableFunction(F, TD); - Changed |= insert(CF); - } - } - - // Insert only weak functions and merge them. By doing these second we - // create thunks to the strong function when possible. When two weak - // functions are identical, we create a new strong function with two weak - // weak thunks to it which are identical but not mergable. - for (std::vector::iterator I = Worklist.begin(), - E = Worklist.end(); I != E; ++I) { - if (!*I) continue; - Function *F = cast(*I); - if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && - F->mayBeOverridden()) { - ComparableFunction CF = ComparableFunction(F, TD); - Changed |= insert(CF); - } - } - DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n'); - } while (!Deferred.empty()); + Changed |= doExhaustiveCompareMerge(); + } while (Registry.haveDeferred()); - FnSet.clear(); + Changed |= doDiffMerge(); return Changed; } -bool DenseMapInfo::isEqual(const ComparableFunction &LHS, - const ComparableFunction &RHS) { - if (LHS.getFunc() == RHS.getFunc() && - LHS.getHash() == RHS.getHash()) - return true; - if (!LHS.getFunc() || !RHS.getFunc()) - return false; - - // One of these is a special "underlying pointer comparison only" object. - if (LHS.getTD() == ComparableFunction::LookupOnly || - RHS.getTD() == ComparableFunction::LookupOnly) - return false; - - assert(LHS.getTD() == RHS.getTD() && - "Comparing functions for different targets"); - - return FunctionComparator(LHS.getTD(), LHS.getFunc(), - RHS.getFunc()).compare(); -} - // Replace direct callers of Old with New. void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType()); @@ -702,7 +1292,7 @@ ++UI; CallSite CS(*TheIter); if (CS && CS.isCallee(TheIter)) { - remove(CS.getInstruction()->getParent()->getParent()); + Registry.remove(CS.getInstruction()->getParent()->getParent()); TheIter.getUse().set(BitcastNew); } } @@ -745,6 +1335,8 @@ // If G was internal then we may have replaced all uses of G with F. If so, // stop here and delete G. There's no need for a thunk. if (G->hasLocalLinkage() && G->use_empty()) { + DEBUG(dbgs() << "All uses of " << G->getName() << " replaced by " + << F->getName() << ". Removing it.\n"); G->eraseFromParent(); return; } @@ -757,9 +1349,14 @@ SmallVector Args; unsigned i = 0; FunctionType *FFTy = F->getFunctionType(); + Type *IntPtrTy = TD ? TD->getIntPtrType(FFTy->getContext()) : NULL; for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); AI != AE; ++AI) { - Args.push_back(createCast(Builder, (Value*)AI, FFTy->getParamType(i))); + Value *Orig = AI; + Value *Cast = createCastIfNeeded(AI, FFTy->getParamType(i), NULL, IntPtrTy); + if (Cast != Orig) + Builder.Insert(cast(Cast)); + Args.push_back(Cast); ++i; } @@ -778,10 +1375,57 @@ G->replaceAllUsesWith(NewG); G->eraseFromParent(); - DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n'); + DEBUG(dbgs() << "writeThunk: " << NewG->getName() << " calling " + << F->getName() << '\n'); ++NumThunksWritten; } +void MergeFunctions::writeThunkWithChoice(Function *NewF, Function *OldF, + int Choice) { + // Deleting the body of a function sets its linkage to External. Save the old + // one here and restore it at the end. + GlobalValue::LinkageTypes OldFLinkage = OldF->getLinkage(); + + // Delete OldF's body + OldF->deleteBody(); + + // Insert single BB with tail call + BasicBlock *BB = BasicBlock::Create(OldF->getContext(), "", OldF); + IRBuilder Builder(BB); + + SmallVector Args; + unsigned i = 0; + FunctionType *FFTy = NewF->getFunctionType(); + Type *IntPtrTy = TD ? TD->getIntPtrType(FFTy->getContext()) : NULL; + for (Function::arg_iterator AI = OldF->arg_begin(), AE = OldF->arg_end(); + AI != AE; ++AI) { + Value *Orig = AI; + Value *Cast = createCastIfNeeded(AI, FFTy->getParamType(i), NULL, IntPtrTy); + if (Cast != Orig) + Builder.Insert(cast(Cast)); + Args.push_back(Cast); + ++i; + } + Args.push_back(Builder.getInt32(Choice)); + + CallInst *CI = Builder.CreateCall(NewF, Args); + CI->setTailCall(); + CI->setCallingConv(NewF->getCallingConv()); + if (OldF->getReturnType()->isVoidTy()) { + Builder.CreateRetVoid(); + } else { + Type *RetTy = OldF->getReturnType(); + if (CI->getType()->isIntegerTy() && RetTy->isPointerTy()) + Builder.CreateRet(Builder.CreateIntToPtr(CI, RetTy)); + else if (CI->getType()->isPointerTy() && RetTy->isIntegerTy()) + Builder.CreateRet(Builder.CreatePtrToInt(CI, RetTy)); + else + Builder.CreateRet(Builder.CreateBitCast(CI, RetTy)); + } + + OldF->setLinkage(OldFLinkage); +} + // Replace G with an alias to F and delete G. void MergeFunctions::writeAlias(Function *F, Function *G) { Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); @@ -833,55 +1477,381 @@ ++NumFunctionsMerged; } -// Insert a ComparableFunction into the FnSet, or merge it away if equal to one -// that was already inserted. -bool MergeFunctions::insert(ComparableFunction &NewF) { - std::pair Result = FnSet.insert(NewF); - if (Result.second) { - DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n'); - return false; +static void insertCondAndRemapInstructions( + Instruction *F1InstInNewF, const std::vector &F2Insts, + Function *NewF, ValueToValueMapTy &F1toNewF, + const SmallVectorImpl &Comps, + Type *IntPtrTy) { + assert(F2Insts.size() == Comps.size() && + "Mis-match between F2Insts & Comps!"); + + SmallVector F2InstsInNewF; + for (unsigned FnI = 0, FnE = F2Insts.size(); FnI != FnE; ++FnI) { + const Instruction *F2Inst = F2Insts[FnI]; + if (!F2Inst) { + F2InstsInNewF.push_back(NULL); + continue; + } + + Instruction *F2InstInNewF = F2Inst->clone(); + + // Remap F2Inst: F2 values -> F1 values + RemapInstruction(F2InstInNewF, Comps[FnI]->getF2toF1Map(), + RF_NoModuleLevelChanges); + // Remap F2Inst: F1 values -> NewF values + RemapInstruction(F2InstInNewF, F1toNewF, RF_NoModuleLevelChanges); + + F2InstsInNewF.push_back(F2InstInNewF); + } + + SmallVector Terminators; + SplitBlockAndInsertSwitch(&NewF->getArgumentList().back(), F1InstInNewF, + F2InstsInNewF, Terminators, IntPtrTy); + + assert(Terminators.size() == F2InstsInNewF.size() + 1 && + "Not enough terminators returned"); + + // F2InstsInNewF are now hooked up to the correct values in NewF. However, + // some of their operands may be pointers/integers so they could potentially + // have the wrong type in NewF (since we treat all pointers and integers of + // same size as equal). Insert casts if needed. + for (unsigned FnI = 0, FnE = F2InstsInNewF.size(); FnI != FnE; ++FnI) { + Instruction *F2InstInNewF = F2InstsInNewF[FnI]; + if (!F2InstInNewF) + continue; + const Instruction *F2Inst = F2Insts[FnI]; + + for (unsigned OpId=0; OpId < F2InstInNewF->getNumOperands(); ++OpId) { + Value *F2NewFOperand = F2InstInNewF->getOperand(OpId); + Value *F2OrigOperand = F2Inst->getOperand(OpId); + + if (F2NewFOperand->getType() != F2OrigOperand->getType()) { + Value *Cast = createCastIfNeeded(F2NewFOperand, + F2OrigOperand->getType(), + F2InstInNewF, + IntPtrTy); + F2InstInNewF->setOperand(OpId, Cast); + } + } } +} - const ComparableFunction &OldF = *Result.first; +static void mergePHINode(const SmallVectorImpl &Fns, + Function *NewF, + ValueToValueMapTy &VMap, /* F1->FNew */ + const PHINode *F1PhiInst, + std::vector F2PhiInsts) { + PHINode *F1PhiInNewF = dyn_cast(VMap[F1PhiInst]); + assert(F1PhiInNewF && "Cannot find F1Inst in NewF!"); + + // The incoming blocks in any of the F2PhiInsts may be in a different order. + // If this is the case, we have to reorder them. F2PhiInsts is intentionally a + // copy, so we can modify it + SmallVector GCInsts; // so we can delete them later. + for (unsigned FnI = 0, FnE = F2PhiInsts.size(); FnI != FnE; ++FnI) { + const PHINode *F2PhiInst = dyn_cast_or_null(F2PhiInsts[FnI]); + if (!F2PhiInst) + continue; + + for (unsigned I = 0, E = F1PhiInNewF->getNumIncomingValues(); I < E; ++I) { + if (!Fns[FnI]->enumerate(F1PhiInst->getIncomingBlock(I), + F2PhiInst->getIncomingBlock(I))) { + // Non-equivalent blocks in the same position - need to reorder PhiInst + PHINode *ReorderedF2PhiInst = PHINode::Create(F2PhiInst->getType(), E); + + for (unsigned II = 0; II < E; ++II) { + Value *BBVal = + Fns[FnI]->getF1toF2Map()[F1PhiInst->getIncomingBlock(II)]; + BasicBlock *BB = cast(BBVal); + Value *Val = F2PhiInst->getIncomingValueForBlock(BB); + ReorderedF2PhiInst->addIncoming(Val, BB); + } + + F2PhiInsts[FnI] = ReorderedF2PhiInst; + GCInsts.push_back(ReorderedF2PhiInst); + break; + } + } + } - // Don't merge tiny functions, since it can just end up making the function - // larger. - // FIXME: Should still merge them if they are unnamed_addr and produce an - // alias. - if (NewF.getFunc()->size() == 1) { - if (NewF.getFunc()->front().size() <= 2) { - DEBUG(dbgs() << NewF.getFunc()->getName() - << " is to small to bother merging\n"); - return false; + // Now merge the PHI nodes. + for (unsigned i = 0; i < F1PhiInNewF->getNumIncomingValues(); ++i) { + Value *F1InValNewF = F1PhiInNewF->getIncomingValue(i), + *F1InVal = F1PhiInst->getIncomingValue(i); + BasicBlock *F1NewFInBlock = F1PhiInNewF->getIncomingBlock(i); + // If this is a repeat occurrence of the same incoming BasicBlock, we + // will have already dealt with it in a previous iteration. + if (F1PhiInNewF->getBasicBlockIndex(F1PhiInNewF->getIncomingBlock(i)) != + (int)i) + continue; + + Value *NewIncoming = F1InValNewF; + + Instruction *InsertPt = F1NewFInBlock->getTerminator(); + + // Build up a chain of cmps and selects that pick the correct incoming + // value. + for (unsigned FnI = 0, FnE = F2PhiInsts.size(); FnI != FnE; ++FnI) { + if (!F2PhiInsts[FnI]) + continue; + const PHINode *F2PhiInst = cast(F2PhiInsts[FnI]); + Value *F2InVal = F2PhiInst->getIncomingValue(i); + + // If we know these are equivalent, there's no further work to do + if (Fns[FnI]->enumerate(F1InVal, F2InVal,/*NoSelfRef=*/true) && + Fns[FnI]->enumerate(F1PhiInst->getIncomingBlock(i), + F2PhiInst->getIncomingBlock(i))) + continue; + + assert(Fns[FnI]->enumerate(F1PhiInst->getIncomingBlock(i), + F2PhiInst->getIncomingBlock(i)) && + "Non-equivalent incoming BBs in PHI."); + + // We have different incoming values from the same block + // Translate F2's incoming value to NewF if needed + Value *F2InValNewF = F2InVal; + if (!isa(F2InVal)) { + Value *V = Fns[FnI]->getF2toF1Map()[F2InVal]; // F2->F1 + F2InValNewF = VMap[V]; // F1->NewF + assert(V && F2InValNewF && "Cannot map F2InVal to NewF"); + } + + // Cast F2InValNewF to the correct type if needed + LLVMContext &Ctx = F1InValNewF->getType()->getContext(); + F2InValNewF = createCastIfNeeded(F2InValNewF, F1InValNewF->getType(), + InsertPt, Fns[FnI]->getDataLayout()->getIntPtrType(Ctx)); + + // Create compare & select + Value *ChoiceArg = &NewF->getArgumentList().back(); + Value *SelectBit = new ICmpInst(InsertPt, + ICmpInst::ICMP_EQ, + &NewF->getArgumentList().back(), + ConstantInt::get(ChoiceArg->getType(), + FnI+1)); + + // SelectBit true -> F2InValNewF, SelectBit false -> existing NewIncoming. + NewIncoming = SelectInst::Create(SelectBit, F2InValNewF, NewIncoming, "", + InsertPt); + } + + if (NewIncoming == F1InValNewF) + continue; // no change for this incoming value + + // Replace all occurrences of this incoming value/block by the new + // ones (phi nodes can have repeated arguments) + for (unsigned j=i; j < F1PhiInNewF->getNumIncomingValues(); ++j) { + if (F1PhiInNewF->getIncomingBlock(j) == F1NewFInBlock) { + F1PhiInNewF->setIncomingValue(j, NewIncoming); + } } } - // Never thunk a strong function to a weak function. - assert(!OldF.getFunc()->mayBeOverridden() || - NewF.getFunc()->mayBeOverridden()); + // Garbage-collect the reordered PHI nodes we temporarily created. + for (SmallVectorImpl::iterator I = GCInsts.begin(), + E = GCInsts.end(); I != E; ++I) + delete *I; +} + +static bool rewriteRecursiveCall( + const CallInst *F1I, const CallInst *F2I, CallInst *NewFI, + const Function *F1, const Function *F2, Function *NewF) { + if (!(F1I->getCalledFunction() == F1 && F2I->getCalledFunction() == F2) && + !(F1I->getCalledFunction() == F2 && F2I->getCalledFunction() == F1)) + return false; // not a recursive/mutually recursive call - DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == " - << NewF.getFunc()->getName() << '\n'); + // Replace NewFI by recursive call to NewF with additional choice argument + SmallVector Args; + for (unsigned AI = 0, End = NewFI->getNumArgOperands(); AI < End; ++AI) { + Value *Arg = NewFI->getArgOperand(AI); + + // Check if F1 or F2 is one of the arguments (veeery unusual case, don't + // handle it for now). + if (Arg == F1 || Arg == F2) + return false; + + Args.push_back(Arg); + } + + if (F1I->getCalledFunction() == F1 && F2I->getCalledFunction() == F2) { + Args.push_back(&NewF->getArgumentList().back()); + } else { + // Need to invert the choice argument + Value *ChoiceArg = &NewF->getArgumentList().back(); + Constant *One = ConstantInt::get(ChoiceArg->getType(), 1); + Args.push_back(BinaryOperator::Create(Instruction::Xor, ChoiceArg, One, "", + NewFI)); + } + + CallInst *CI = CallInst::Create(NewF, Args); + CI->setCallingConv(NewF->getCallingConv()); + + ReplaceInstWithInst(NewFI, CI); - Function *DeleteF = NewF.getFunc(); - NewF.release(); - mergeTwoFunctions(OldF.getFunc(), DeleteF); return true; } -// Remove a function from FnSet. If it was already in FnSet, add it to Deferred -// so that we'll look at it in the next round. -void MergeFunctions::remove(Function *F) { - // We need to make sure we remove F, not a function "equal" to F per the - // function equality comparator. +/// Clone F1 into a new function with F1's name + MERGE_SUFFIX. Adds an +/// additional i32 argument to the function. +static Function *cloneAndAddArgument(Function *F1, ValueToValueMapTy &VMap) { + LLVMContext &Context = F1->getContext(); + + std::vector ArgTypes; + for (Function::const_arg_iterator I = F1->arg_begin(), E = F1->arg_end(); + I != E; ++I) + ArgTypes.push_back(I->getType()); + ArgTypes.push_back(Type::getInt32Ty(Context)); + + FunctionType *FTy = FunctionType::get(F1->getFunctionType()->getReturnType(), + ArgTypes, + F1->getFunctionType()->isVarArg()); + Function *NewF = Function::Create(FTy, GlobalValue::InternalLinkage, + F1->getName()+MERGED_SUFFIX); + + insertFunctionAfter(NewF, F1); + + if (F1->hasSection()) + NewF->setSection(F1->getSection()); + + NewF->setCallingConv(CallingConv::Fast); + + Function::arg_iterator DestI = NewF->arg_begin(); + for (Function::const_arg_iterator I = F1->arg_begin(), E = F1->arg_end(); + I != E; ++I) { + DestI->setName(I->getName()); // Copy the name over... + VMap[I] = DestI++; // Add mapping to VMap + } + + // Name the selector argument + DestI->setName("__merge_arg"); + + SmallVector Returns; + CloneFunctionInto(NewF, F1, VMap, false, Returns); + + return NewF; +} + +typedef std::map > + CombinedDiffMap; + +void MergeFunctions::outlineAndMergeFunctions( + SmallVectorImpl &Fns) { + assert(!Fns.empty() && "Cannot merge empty set of functions"); + + // All comparator instances in Fns share the same F1 + Function *F1 = Fns.front()->getF1(); + + // Clone F1 into new function with an additional i32 argument + ValueToValueMapTy VMap; // maps F1 values -> NewF values + Function *NewF = cloneAndAddArgument(F1, VMap); + + // Combine all the DifferingInstructions maps in Fns into one single map of + // lists to aid the merging process. // - // The special "lookup only" ComparableFunction bypasses the expensive - // function comparison in favour of a pointer comparison on the underlying - // Function*'s. - ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly); - if (FnSet.erase(CF)) { - DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n"); - Deferred.push_back(F); + // Map F1 instruction -> list of F2 instructions indexed by position in Fns. + CombinedDiffMap AllDifferingInstructions; + for (unsigned I = 0, E = Fns.size(); I != E; ++I) { + FunctionComparator *Comp = Fns[I]; + for (DenseMap::iterator + DiffI = Comp->DifferingInstructions.begin(), + DiffE = Comp->DifferingInstructions.end(); + DiffI != DiffE; ++DiffI) { + AllDifferingInstructions[DiffI->first].resize(Fns.size()); + AllDifferingInstructions[DiffI->first][I] = DiffI->second; + } + } + + // Merge differing PHI nodes. We need to handle these first because they could + // be affected later on when we split basic blocks, thus making them + // impossible to merge. + for (CombinedDiffMap::const_iterator I = AllDifferingInstructions.begin(), + E = AllDifferingInstructions.end(); + I != E; ++I) { + const PHINode *F1PhiInst = dyn_cast(I->first); + if (!F1PhiInst) + continue; + + const std::vector &F2PhiInsts = I->second; + + mergePHINode(Fns, NewF, VMap, F1PhiInst, F2PhiInsts); + } + + // Merge recursive calls + // + // TODO: We currently only support this optimization for pairs of functions. + // If more than two functions are merged, we mark the recursive calls as + // DifferingInstructions which causes switch statements to be inserted and + // recursive calls going through thunks. It wouldn't be too hard to implement + // self-recursive calls for multi-merges. *Mutually* recursive calls with + // multi-merges are a little trickier - that's why we leave it for now. + if (Fns.size() == 1) { + FunctionComparator *Comp = Fns.front(); + for (DenseMap::const_iterator + I = Comp->SelfRefInstructions.begin(), + E = Comp->SelfRefInstructions.end(); + I != E; ++I) { + const Instruction *F1I = I->first; + if (Comp->DifferingInstructions.find(F1I) != + Comp->DifferingInstructions.end()) + continue; // Differing in other ways too, so deal with it later. + + // Attempt recursive call rewriting + if (isa(F1I)) { + const CallInst *F1Call = cast(F1I); + const CallInst *F2Call = dyn_cast(I->second); + CallInst *NewFCall = dyn_cast(VMap[F1I]); + + if (F1Call && F2Call && NewFCall && + rewriteRecursiveCall(F1Call, F2Call, NewFCall, + Comp->getF1(), Comp->getF2(), NewF)) + continue; + } + + // Can't rewrite it. Mark as differing and insert conditional later + Comp->DifferingInstructions[F1I] = I->second; + } + } else { + for (unsigned I = 0, E = Fns.size(); I != E; ++I) { + FunctionComparator *Comp = Fns[I]; + for (DenseMap::const_iterator + II = Comp->SelfRefInstructions.begin(), + EE = Comp->SelfRefInstructions.end(); + II != EE; ++II) { + const Instruction *F1I = II->first; + if (Comp->DifferingInstructions.find(F1I) != + Comp->DifferingInstructions.end()) + continue; // Differing in other ways too, so deal with it later. + + AllDifferingInstructions[F1I].resize(Fns.size()); + AllDifferingInstructions[F1I][I] = II->second; + } + } + } + + // For each differing instruction, splice basic block, and insert conditional + LLVMContext &Context = NewF->getContext(); + Type *IntPtrType = TD->getIntPtrType(Context); + for (CombinedDiffMap::const_iterator I = AllDifferingInstructions.begin(), + E = AllDifferingInstructions.end(); + I != E; ++I) { + const Instruction *F1Inst = I->first; + const std::vector &F2Insts = I->second; + + assert(VMap.find(F1Inst) != VMap.end() && + "Cannot find differing inst!"); + Instruction *F1InstInNewF = cast(VMap[F1Inst]); + + if (isa(F1InstInNewF)) + continue; // we already handled these above + + insertCondAndRemapInstructions(F1InstInNewF, F2Insts, + NewF, VMap, Fns, IntPtrType); + } + + // Replace functions with thunks + writeThunkWithChoice(NewF, F1, 0); + for (unsigned FnI = 0, FnE = Fns.size(); FnI != FnE; ++FnI) { + Function *F2 = Fns[FnI]->getF2(); + writeThunkWithChoice(NewF, F2, FnI + 1); } } @@ -898,7 +1868,7 @@ UI != UE; ++UI) { Use &U = UI.getUse(); if (Instruction *I = dyn_cast(U.getUser())) { - remove(I->getParent()->getParent()); + Registry.remove(I->getParent()->getParent()); } else if (isa(U.getUser())) { // do nothing } else if (Constant *C = dyn_cast(U.getUser())) { @@ -909,3 +1879,202 @@ } } } + +bool MergeFunctions::mergeBucket(std::list &Fns, + bool &Changed) { + for (std::list::iterator FnI = Fns.begin(), + FnE = Fns.end(); FnI != FnE; ++FnI) { + if (!FnI->isNew()) + continue; + + if (!FnI->getFunc()) + continue; + + SmallVector DiffMergeCandidates; + + std::list::iterator Fn2I = FnI; + for (++Fn2I; Fn2I != FnE; ++Fn2I) { + if (!Fn2I->getFunc()) + continue; + + assert(FnI->getFunc() != Fn2I->getFunc() && + "Duplicate function in list!"); + + FunctionComparator *Comp = new FunctionComparator(TD, FnI->getFunc(), + Fn2I->getFunc()); + + if (!Comp->compare() || !Comp->isMergeCandidate()) { + delete Comp; + continue; + } + + // Never thunk a strong function to a weak function. + assert(!FnI->getFunc()->mayBeOverridden() || + Fn2I->getFunc()->mayBeOverridden()); + + if (Comp->isExactMatch()) { + // Equiv merge the two functions. Throw away any diff merge + // candidate we might have found so far. + delete Comp; + + DEBUG(dbgs() << "- Equiv merge " << FnI->getFunc()->getName() + << " == " << Fn2I->getFunc()->getName() << '\n'); + + Function *DeleteF = Fn2I->getFunc(); + Registry.remove(DeleteF, /*reanalyze=*/false); + + mergeTwoFunctions(FnI->getFunc(), DeleteF); + + Changed = true; + + // mergeTwoFunctions may have removed functions from this bucket and + // invalidated the iterators. Rescan the whole bucket, continuing + // from the current function (previous ones will have been + // markCompared()) + for (SmallVector::iterator + I = DiffMergeCandidates.begin(), E = DiffMergeCandidates.end(); + I != E; ++I) + delete *I; + + return false; + } else { + DiffMergeCandidates.push_back(Comp); + } + } + + if (!DiffMergeCandidates.empty()) { + // Add to our list of candidates for diff merging + for (SmallVector::iterator + I = DiffMergeCandidates.begin(), E = DiffMergeCandidates.end(); + I != E; ++I) { + Registry.insertCandidate(*I); + } + } + + FnI->markCompared(); + } + + return true; +} + +bool MergeFunctions::doExhaustiveCompareMerge() { + bool Changed = false; + + // Process buckets with strong functions first. + for (MergeRegistry::FnCompareMap::iterator + BucketsI = Registry.FunctionsToCompare.begin(), + BucketsE = Registry.FunctionsToCompare.end(); + BucketsI != BucketsE; ++BucketsI) { + std::list &Fns = BucketsI->second; + if (Fns.size() < 2 || Fns.front().getFunc()->mayBeOverridden()) + continue; + + DEBUG(dbgs() << "Processing strong bucket " << BucketsI->first << " with " + << Fns.size() << " functions\n"); + // Repeatedly scan this bucket, until we find no more functions to equiv + // merge. + while (!mergeBucket(Fns, Changed) && Fns.size() > 1) { + DEBUG(dbgs() << "Rescanning bucket.\n"); + } + } + + // Process buckets with weak functions. + for (MergeRegistry::FnCompareMap::iterator + BucketsI = Registry.FunctionsToCompare.begin(), + BucketsE = Registry.FunctionsToCompare.end(); + BucketsI != BucketsE; ++BucketsI) { + std::list &Fns = BucketsI->second; + if (Fns.size() < 2 || !Fns.front().getFunc()->mayBeOverridden()) + continue; + + DEBUG(dbgs() << "Processing weak bucket " << BucketsI->first << " with " + << Fns.size() << " functions\n"); + // Repeatedly scan this bucket, until we find no more functions to equiv + // merge. + while (!mergeBucket(Fns, Changed) && Fns.size() > 1) { + DEBUG(dbgs() << "Rescanning bucket.\n"); + } + } + + return Changed; +} + +static bool orderComparatorsByMetric(FunctionComparator *Cmp1, + FunctionComparator *Cmp2) { + return (Cmp1->getSimilarityMetric() > Cmp2->getSimilarityMetric()); +} + +bool MergeFunctions::doDiffMerge() { + if (Registry.FunctionsToMerge.empty()) + return false; + + bool Changed = false; + DenseSet MergedFns; // Functions that have already been merged + Registry.FunctionsToMerge.sort(orderComparatorsByMetric); + + DEBUG(dumpSimilar(Registry.SimilarFunctions, dbgs())); + + for (std::list::iterator + I = Registry.FunctionsToMerge.begin(), + E = Registry.FunctionsToMerge.end(); + I != E; ++I) { + FunctionComparator *Comp = *I; + Function *F1 = Comp->getF1(); + // Ignore it if we've already merged this fn + if (MergedFns.count(F1) || MergedFns.count(Comp->getF2())) + continue; + + assert(Registry.SimilarFunctions.count(F1) && + "Comparator doesn't exist in SimilarFunctions map"); + + // Look at all functions F that F1 could be merged with. Merge with each F, + // unless there is another function F' that is more similar to F than F1. + MergeRegistry::FnComparatorSet &SimilarFns = Registry.SimilarFunctions[F1]; + SmallVector CurrentMerge; + + for (MergeRegistry::FnComparatorSet::iterator + CandidateI = SimilarFns.begin(), CandidateE = SimilarFns.end(); + CandidateI != CandidateE; ++CandidateI) { + FunctionComparator *Comp2 = *CandidateI; + assert(Comp2->getF1() == F1 && "Inconsistency in SimilarFunctions"); + Function *F2 = Comp2->getF2(); + + // Ignore it if we've already merged this fn + if (MergedFns.count(F2)) + continue; + + // Check whether there is a better merge candidate for F2 + if (Registry.getMaxSimilarity(F2, MergedFns) > + Comp2->getSimilarityMetric()) + continue; + + // Ok, we actually want to merge with F2 + CurrentMerge.push_back(Comp2); + MergedFns.insert(F2); + } + + if (CurrentMerge.empty()) + continue; + + MergedFns.insert(F1); + + DEBUG(dbgs() << "- Multi merge of " << F1->getName() << " with " + << CurrentMerge.size() << " functions.\n"); + + +/* DEBUG(dbgs() << "- Diff merging " << Comp->getF1()->getName() << " and " + * << Comp->getF2()->getName() << " : " + * << " bbs=" << Comp->BasicBlockCount + * << " insts=" << Comp->InstructionCount + * << " failed=" << Comp->DifferingInstructionsCount + * << " metric=" << Comp->getSimilarityMetric() + * << '\n'); + * + */ + Changed = true; + outlineAndMergeFunctions(CurrentMerge); + } + + return Changed; +} + Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -53,6 +53,11 @@ RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); +static cl::opt +EnableMergeFunctions("enable-mergefunc", cl::init(false), cl::Hidden, + cl::desc("Do not run MergeFunctions pass at module level")); + + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -258,6 +263,14 @@ MPM.add(createConstantMergePass()); // Merge dup global constants } } + + if (EnableMergeFunctions) { + MPM.add(createMergeFunctionsPass()); + MPM.add(createJumpThreadingPass()); // Merge consecutive conditionals + MPM.add(createInstructionCombiningPass()); + MPM.add(createCFGSimplificationPass()); + } + addExtensionsToPM(EP_OptimizerLast, MPM); } Index: test/Transforms/MergeFunc/inttoptr-address-space.ll =================================================================== --- test/Transforms/MergeFunc/inttoptr-address-space.ll +++ test/Transforms/MergeFunc/inttoptr-address-space.ll @@ -1,4 +1,4 @@ -; RUN: opt -mergefunc -S < %s | FileCheck %s +; RUN: opt -mergefunc -mergefunc-min-insts=1 -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-n8:16:32-S128" %.qux.2496 = type { i32, %.qux.2497 } Index: test/Transforms/MergeFunc/inttoptr.ll =================================================================== --- test/Transforms/MergeFunc/inttoptr.ll +++ test/Transforms/MergeFunc/inttoptr.ll @@ -1,4 +1,4 @@ -; RUN: opt -mergefunc -S < %s | FileCheck %s +; RUN: opt -mergefunc -mergefunc-min-insts=1 -S < %s | FileCheck %s ; PR15185 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" target triple = "i386-pc-linux-gnu" Index: test/Transforms/MergeFunc/merge-similar.ll =================================================================== --- test/Transforms/MergeFunc/merge-similar.ll +++ test/Transforms/MergeFunc/merge-similar.ll @@ -0,0 +1,160 @@ +; RUN: opt -mergefunc -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.s_x2 = type { i32, i32, %struct.s_x2node*, %struct.s_x2node** } +%struct.s_x2node = type { %struct.symbol*, i8*, %struct.s_x2node*, %struct.s_x2node** } +%struct.symbol = type { i8*, i32, i32, %struct.rule*, %struct.symbol*, i32, i32, i8*, i32, i32, i8*, i32, i8*, i32, i32, %struct.symbol** } +%struct.rule = type { %struct.symbol*, i8*, i32, i32, i32, %struct.symbol**, i8**, i32, i8*, %struct.symbol*, i32, i32, %struct.rule*, %struct.rule* } +%struct.s_x3 = type { i32, i32, %struct.s_x3node*, %struct.s_x3node** } +%struct.s_x3node = type { %struct.state*, %struct.config*, %struct.s_x3node*, %struct.s_x3node** } +%struct.state = type { %struct.config*, %struct.config*, i32, %struct.action*, i32, i32, i32, i32, i32 } +%struct.action = type { %struct.symbol*, i32, %union.anon, %struct.action*, %struct.action* } +%union.anon = type { %struct.state* } +%struct.config = type { %struct.rule*, i32, i8*, %struct.plink*, %struct.plink*, %struct.state*, i32, %struct.config*, %struct.config* } +%struct.plink = type { %struct.config*, %struct.plink* } + +@x2a = external hidden unnamed_addr global %struct.s_x2*, align 8 +@x3a = external hidden unnamed_addr global %struct.s_x3*, align 8 + +; Function Attrs: nounwind optsize +declare void @free(i8* nocapture) #0 + +; Function Attrs: nounwind optsize +declare noalias i8* @calloc(i64, i64) #0 + +; Function Attrs: nounwind optsize uwtable +define void @Symbol_init() #1 { +; CHECK-LABEL: @Symbol_init( +; CHECK: tail call fastcc void @Symbol_init.mrg(i32 0) +entry: + %0 = load %struct.s_x2** @x2a, align 8 + %tobool = icmp eq %struct.s_x2* %0, null + br i1 %tobool, label %if.end, label %if.end11 + +if.end: ; preds = %entry + %call = tail call noalias i8* @malloc(i64 24) #2 + %1 = bitcast i8* %call to %struct.s_x2* + store %struct.s_x2* %1, %struct.s_x2** @x2a, align 8 + %tobool1 = icmp eq i8* %call, null + br i1 %tobool1, label %if.end11, label %if.then2 + +if.then2: ; preds = %if.end + %size = bitcast i8* %call to i32* + store i32 128, i32* %size, align 4 + %count = getelementptr inbounds i8* %call, i64 4 + %2 = bitcast i8* %count to i32* + store i32 0, i32* %2, align 4 + %call3 = tail call noalias i8* @calloc(i64 128, i64 40) #2 + %3 = bitcast i8* %call3 to %struct.s_x2node* + %tbl = getelementptr inbounds i8* %call, i64 8 + %4 = bitcast i8* %tbl to %struct.s_x2node** + store %struct.s_x2node* %3, %struct.s_x2node** %4, align 8 + %cmp = icmp eq i8* %call3, null + br i1 %cmp, label %if.then5, label %if.else + +if.then5: ; preds = %if.then2 + tail call void @free(i8* %call) #2 + store %struct.s_x2* null, %struct.s_x2** @x2a, align 8 + br label %if.end11 + +if.else: ; preds = %if.then2 + %arrayidx = getelementptr inbounds i8* %call3, i64 4096 + %5 = bitcast i8* %arrayidx to %struct.s_x2node** + %ht = getelementptr inbounds i8* %call, i64 16 + %6 = bitcast i8* %ht to %struct.s_x2node*** + store %struct.s_x2node** %5, %struct.s_x2node*** %6, align 8 + br label %for.body + +for.body: ; preds = %for.body.for.body_crit_edge, %if.else + %7 = phi %struct.s_x2node** [ %5, %if.else ], [ %.pre15, %for.body.for.body_crit_edge ] + %indvars.iv = phi i64 [ 0, %if.else ], [ %indvars.iv.next, %for.body.for.body_crit_edge ] + %arrayidx9 = getelementptr inbounds %struct.s_x2node** %7, i64 %indvars.iv + store %struct.s_x2node* null, %struct.s_x2node** %arrayidx9, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 128 + br i1 %exitcond, label %if.end11, label %for.body.for.body_crit_edge + +for.body.for.body_crit_edge: ; preds = %for.body + %.pre = load %struct.s_x2** @x2a, align 8 + %ht8.phi.trans.insert = getelementptr inbounds %struct.s_x2* %.pre, i64 0, i32 3 + %.pre15 = load %struct.s_x2node*** %ht8.phi.trans.insert, align 8 + br label %for.body + +if.end11: ; preds = %for.body, %if.then5, %if.end, %entry + ret void +} + +; CHECK-LABEL: define internal fastcc void @Symbol_init.mrg(i32 +; CHECK: load %struct.s_x2** @x2a +; CHECK: load %struct.s_x3** @x3a + +; Function Attrs: nounwind optsize uwtable +define void @State_init() #1 { +; DHECK-LABEL: @State_init( +; DHECK: tail call fastcc void @Symbol_init.mrg(i32 1) +entry: + %0 = load %struct.s_x3** @x3a, align 8 + %tobool = icmp eq %struct.s_x3* %0, null + br i1 %tobool, label %if.end, label %if.end11 + +if.end: ; preds = %entry + %call = tail call noalias i8* @malloc(i64 24) #2 + %1 = bitcast i8* %call to %struct.s_x3* + store %struct.s_x3* %1, %struct.s_x3** @x3a, align 8 + %tobool1 = icmp eq i8* %call, null + br i1 %tobool1, label %if.end11, label %if.then2 + +if.then2: ; preds = %if.end + %size = bitcast i8* %call to i32* + store i32 128, i32* %size, align 4 + %count = getelementptr inbounds i8* %call, i64 4 + %2 = bitcast i8* %count to i32* + store i32 0, i32* %2, align 4 + %call3 = tail call noalias i8* @calloc(i64 128, i64 40) #2 + %3 = bitcast i8* %call3 to %struct.s_x3node* + %tbl = getelementptr inbounds i8* %call, i64 8 + %4 = bitcast i8* %tbl to %struct.s_x3node** + store %struct.s_x3node* %3, %struct.s_x3node** %4, align 8 + %cmp = icmp eq i8* %call3, null + br i1 %cmp, label %if.then5, label %if.else + +if.then5: ; preds = %if.then2 + tail call void @free(i8* %call) #2 + store %struct.s_x3* null, %struct.s_x3** @x3a, align 8 + br label %if.end11 + +if.else: ; preds = %if.then2 + %arrayidx = getelementptr inbounds i8* %call3, i64 4096 + %5 = bitcast i8* %arrayidx to %struct.s_x3node** + %ht = getelementptr inbounds i8* %call, i64 16 + %6 = bitcast i8* %ht to %struct.s_x3node*** + store %struct.s_x3node** %5, %struct.s_x3node*** %6, align 8 + br label %for.body + +for.body: ; preds = %for.body.for.body_crit_edge, %if.else + %7 = phi %struct.s_x3node** [ %5, %if.else ], [ %.pre15, %for.body.for.body_crit_edge ] + %indvars.iv = phi i64 [ 0, %if.else ], [ %indvars.iv.next, %for.body.for.body_crit_edge ] + %arrayidx9 = getelementptr inbounds %struct.s_x3node** %7, i64 %indvars.iv + store %struct.s_x3node* null, %struct.s_x3node** %arrayidx9, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 128 + br i1 %exitcond, label %if.end11, label %for.body.for.body_crit_edge + +for.body.for.body_crit_edge: ; preds = %for.body + %.pre = load %struct.s_x3** @x3a, align 8 + %ht8.phi.trans.insert = getelementptr inbounds %struct.s_x3* %.pre, i64 0, i32 3 + %.pre15 = load %struct.s_x3node*** %ht8.phi.trans.insert, align 8 + br label %for.body + +if.end11: ; preds = %for.body, %if.then5, %if.end, %entry + ret void +} + +; Function Attrs: nounwind optsize +declare noalias i8* @malloc(i64) #0 + +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind optsize uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind optsize } +