diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -1108,6 +1108,85 @@ return CallGraphRoot; } + bool hasSimilarFunction(unsigned id) { return SimilarFunctions.count(id); } + + void addToSimilarFunctions(unsigned id, GlobalValue::GUID GUID) { + if (id == 0) + return; // invalid id. + if (DuplicateFunctions.count(GUID)) + return; + ValueInfo VI = getValueInfo(GUID); + if (!VI) + return; + // assert(VI.getSummaryList().size() == 1); + if (VI.getSummaryList().size() != 1) + return; + GlobalValueSummary *S = VI.getSummaryList()[0].get(); + if (!isa(S)) + return; + assert(isa(S) && "Not a function summary!"); + if (FunctionSimilarityHashes.count(GUID)) { + // Erase the GUID having multiple visits in the ModuleSummaryIndex. + FunctionSimilarityHashes.erase(GUID); + DuplicateFunctions.insert(GUID); + return; + } + + FunctionSimilarityHashes[GUID] = id; + } + + void populateReverseSimilarityHashMap() { + for (auto &p : FunctionSimilarityHashes) + SimilarFunctions[p.second].push_back(p.first); + } + + void removeSingleEntriesFromSimHashMaps() { + // Iterate over the hash to remove entries with no duplicates. + for (auto I = SimilarFunctions.begin(), E = SimilarFunctions.end(); + I != E;) { + auto Next = std::next(I); + assert(I->second.size() && "Empty Entry!"); + if (I->second.size() == 1) { + FunctionSimilarityHashes.erase(I->second[0]); + SimilarFunctions.erase(I); + } + I = Next; + } + } + + std::map> &getSimilarFunctions() { + return SimilarFunctions; + } + + const std::map> & + getSimilarFunctions() const { + return SimilarFunctions; + } + + unsigned getSimilarityHash(GlobalValue::GUID ID) const { + return FunctionSimilarityHashes.find(ID)->second; + } + + std::map &getSimilarFunctionsHash() { + return FunctionSimilarityHashes; + } + + const std::map &getSimilarFunctionsHash() const { + return FunctionSimilarityHashes; + } + + void addToHostSimilarFunction(GlobalValue::GUID ID) { + HostSimilarFunction.insert(ID); + } + + std::set &getHostSimilarFunction() { + return HostSimilarFunction; + } + + const std::set &getHostSimilarFunction() const { + return HostSimilarFunction; + } + bool withGlobalValueDeadStripping() const { return WithGlobalValueDeadStripping; } diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -292,6 +292,8 @@ void initializeMemorySSAWrapperPassPass(PassRegistry&); void initializeMemorySanitizerLegacyPassPass(PassRegistry&); void initializeMergeFunctionsLegacyPassPass(PassRegistry&); +void initializeMergeFunctionsPass(PassRegistry&); +void initializeMergeSimilarFunctionsPass(PassRegistry&); void initializeMergeICmpsLegacyPassPass(PassRegistry &); void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&); void initializeMetaRenamerPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -197,6 +197,7 @@ (void) llvm::createPostOrderFunctionAttrsLegacyPass(); (void) llvm::createReversePostOrderFunctionAttrsPass(); (void) llvm::createMergeFunctionsPass(); + (void) llvm::createMergeSimilarFunctionsPass(); (void) llvm::createMergeICmpsLegacyPass(); (void) llvm::createExpandMemCmpPass(); std::string buf; diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -215,6 +215,13 @@ /// function(s). ModulePass *createHotColdSplittingPass(); +//===----------------------------------------------------------------------===// +/// createMergeSimilarFunctionsPass - This pass discovers similar functions and +/// merges them. +/// +ModulePass * +createMergeSimilarFunctionsPass(const ModuleSummaryIndex *S = nullptr); + //===----------------------------------------------------------------------===// /// createPartialInliningPass - This pass inlines parts of functions. /// diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -126,6 +126,20 @@ Function *CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo = nullptr); +/// Used to control @fn CloneFunctionInto. +enum class CloneType { + InvalidCloneType, + /// Cloning will result in module level changes. + ModuleLevelChanges, + /// !ModuleLevelChanges, When no module level changes will be made to the + /// cloned function. + NoModuleLevelChanges, + /// Cloning will be used for extracting functions by passes like function + /// merging, it does not require module level changes but debug info needs + /// special treatment like: DISubprogram is not cloned. + ExtractingFunctions, +}; + /// Clone OldFunc into NewFunc, transforming the old arguments into references /// to VMap values. Note that if NewFunc already has basic blocks, the ones /// cloned into it will be added to the end of the function. This function @@ -136,7 +150,7 @@ /// mappings. /// void CloneFunctionInto(Function *NewFunc, const Function *OldFunc, - ValueToValueMapTy &VMap, bool ModuleLevelChanges, + ValueToValueMapTy &VMap, CloneType CT, SmallVectorImpl &Returns, const char *NameSuffix = "", ClonedCodeInfo *CodeInfo = nullptr, diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -76,6 +76,89 @@ cl::value_desc("filename"), cl::desc("File to emit dot graph of new summary into.")); +cl::opt UseGlobalAliases( + "mergesimilarfunc-global-aliases", cl::Hidden, cl::init(false), + cl::desc("Enable writing alias by enabling global aliases")); + +cl::opt MergeMinInsts( + "mergesimilarfunc-min-insts", cl::Hidden, cl::init(4), + cl::desc("Min instructions required to even consider single block fns")); + +// Minimize the name pollution caused by the enum values. +namespace Opt { +cl::opt MergeLevel( + "mergesimilarfunc-level", cl::Hidden, cl::ZeroOrMore, + cl::desc("Level of function merging:"), cl::init(size), + cl::values(clEnumVal(none, "function merging disabled"), + clEnumVal(size, "only try to merge functions that are optimized " + "for size"), + clEnumVal(all, "attempt to merge all similar functions"))); +} + +namespace llvm { + +static const char *MERGED_SUFFIX = "__merged"; + +/// Returns the type id for a type to be hashed. We turn pointer types into +/// integers here because the actual compare logic below considers pointers and +/// integers of the same size as equal. +static Type::TypeID getTypeIDForHash(Type *Ty) { + if (Ty->isPointerTy()) + return Type::IntegerTyID; + return Ty->getTypeID(); +} + +bool isAliasCapable(const Function* G) { + return + UseGlobalAliases && G->hasGlobalUnnamedAddr() + && (G->hasExternalLinkage() || G->hasLocalLinkage() || G->hasWeakLinkage()); +} + +bool isComparisonCandidate(const Function *F) { + if (Opt::MergeLevel == Opt::size) { + // Only consider functions that are to be optimized for size. + // By default, that is all functions at -Os/-Oz and nothing at -O2. + bool Os = F->getAttributes(). + hasAttribute(AttributeList::FunctionIndex, Attribute::OptimizeForSize); + bool Oz = F->getAttributes(). + hasAttribute(AttributeList::FunctionIndex, Attribute::MinSize); + if (!Os && !Oz) + return false; + } + + // Ignore declarations and tiny functions - no point in merging those + if (F->isDeclaration()) return false; + if (F->getName().endswith(MERGED_SUFFIX)) return false; + if (F->hasAvailableExternallyLinkage()) return false; + if (F->hasFnAttribute(Attribute::AlwaysInline)) return false; + if (F->size() == 1 && F->begin()->size() < MergeMinInsts) + return isAliasCapable(F); + + return true; +} + +unsigned profileFunction(const Function *F) { + FunctionType *FTy = F->getFunctionType(); + if (!isComparisonCandidate(F)) + return 0; + if (F->hasGC() || FTy->isVarArg() || !F->hasExactDefinition()) + return 0; + FoldingSetNodeID ID; + ID.AddInteger(F->size()); + ID.AddInteger(F->getCallingConv()); + // Add pure attribute, has side-effects attribute. + ID.AddBoolean(F->hasFnAttribute(Attribute::NoUnwind)); + ID.AddBoolean(F->hasFnAttribute(Attribute::NoReturn)); + //ID.AddBoolean(F->hasGC()); + //ID.AddBoolean(F->isInterposable()); + //ID.AddBoolean(FTy->isVarArg()); + ID.AddInteger(getTypeIDForHash(FTy->getReturnType())); + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + ID.AddInteger(getTypeIDForHash(FTy->getParamType(i))); + return ID.ComputeHash(); +} +} + // Walk through the operands of a given User via worklist iteration and populate // the set of GlobalValue references encountered. Invoked either on an // Instruction or a GlobalVariable (which walks its initializer). diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -318,7 +318,7 @@ "modules."); SmallVector Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, &OrigF, VMap, /*ModuleLevelChanges=*/true, Returns, + CloneFunctionInto(NewF, &OrigF, VMap, CloneType::ModuleLevelChanges, Returns, "", nullptr, nullptr, Materializer); OrigF.deleteBody(); } diff --git a/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp b/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp --- a/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp +++ b/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp @@ -316,7 +316,7 @@ } } SmallVector Returns; - CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns); + CloneFunctionInto(NewF, F, VMap, CloneType::NoModuleLevelChanges, Returns); // Build new MDNode. SmallVector KernelMDArgs; diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -667,7 +667,7 @@ auto savedLinkage = NewF->getLinkage(); NewF->setLinkage(llvm::GlobalValue::ExternalLinkage); - CloneFunctionInto(NewF, &OrigF, VMap, /*ModuleLevelChanges=*/true, Returns); + CloneFunctionInto(NewF, &OrigF, VMap, CloneType::ModuleLevelChanges, Returns); NewF->setLinkage(savedLinkage); NewF->setVisibility(savedVisibility); diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -27,6 +27,7 @@ LowerTypeTests.cpp MergeFunctions.cpp OpenMPOpt.cpp + MergeSimilarFunctions.cpp PartialInlining.cpp PassManagerBuilder.cpp PruneEH.cpp diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -45,6 +45,8 @@ initializeSingleLoopExtractorPass(Registry); initializeLowerTypeTestsPass(Registry); initializeMergeFunctionsLegacyPassPass(Registry); + initializeMergeFunctionsPass(Registry); + initializeMergeSimilarFunctionsPass(Registry); initializePartialInlinerLegacyPassPass(Registry); initializeAttributorLegacyPassPass(Registry); initializeAttributorCGSCCLegacyPassPass(Registry); diff --git a/llvm/lib/Transforms/IPO/MergeSimilarFunctions.cpp b/llvm/lib/Transforms/IPO/MergeSimilarFunctions.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/IPO/MergeSimilarFunctions.cpp @@ -0,0 +1,2197 @@ +//===- MergeSimilarFunctions.cpp - Merge similar functions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass merges both equivalent and similar functions to reduce code size. +// +// For a more detailed explanation of the approach, see: +// Edler von Koch et al. "Exploiting Function Similarity for Code Size +// Reduction", LCTES 2014. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mergesimilarfunc" +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include +#include +using namespace llvm; + +STATISTIC(NumFunctionsMerged, "Number of functions merged"); +STATISTIC(NumThunksWritten, "Number of thunks generated"); +STATISTIC(NumAliasesWritten, "Number of aliases generated"); +STATISTIC(NumDoubleWeak, "Number of new functions created"); +STATISTIC(NumMultiMerged, "Number of multi-merged functions"); + +STATISTIC(NumSimilarFunctionsMerged, "Number of similar functions merged"); + +static cl::opt MergeMinInsts( + "mergesimilarfunc-min-insts", cl::Hidden, cl::init(4), + cl::desc("Min instructions required to even consider single block fns")); + +static cl::opt MergeDifferingMinInsts( + "mergesimilarfunc-diff-min-insts", cl::Hidden, cl::init(15), + cl::desc("Min instructions required to try merging differing functions")); + +static cl::opt MergeMaxDiffering( + "mergesimilarfunc-max-diff", cl::Hidden, cl::init(8), + cl::desc("Maximum number of differing instructions allowed")); + +static cl::opt MergeMinSimilarity( + "mergesimilarfunc-min-similarity", cl::Hidden, cl::init(70), + cl::desc("Minimum percentage of similar instructions required")); + +static cl::opt OptPrintMerges("mergesimilarfunc-print-merges", cl::Hidden, + cl::init(false)); + +static cl::opt UseGlobalAliases( + "mergesimilarfunc-global-aliases", cl::Hidden, cl::init(false), + cl::desc("Enable writing alias by enabling global aliases")); + +void PrintMerges(const char *Desc, Function *Old, Function *New) { + if (OptPrintMerges) { + dbgs() << "=== [" << Desc << "] replacing " << Old->getName() << " with " + << New->getName() << "\n"; + } +} + +// Minimize the name pollution caused by the enum values. +namespace Opt { +enum MergeLevelEnum { none, size, all }; +static cl::opt MergeLevel( + "mergesimilarfunc-level", cl::Hidden, cl::ZeroOrMore, + cl::desc("Level of function merging:"), cl::init(size), + cl::values(clEnumVal(none, "function merging disabled"), + clEnumVal(size, "only try to merge functions that are optimized " + "for size"), + clEnumVal(all, "attempt to merge all similar functions"))); +} + +static const char *MERGED_SUFFIX = "__merged"; + +/// Returns the type id for a type to be hashed. We turn pointer types into +/// integers here because the actual compare logic below considers pointers and +/// integers of the same size as equal. +static Type::TypeID getTypeIDForHash(Type *Ty) { + if (Ty->isPointerTy()) + return Type::IntegerTyID; + return Ty->getTypeID(); +} + +/// Creates a hash-code for the function which is the same for any two +/// functions that will compare equal, without looking at the instructions +/// inside the function. +static unsigned profileFunction(const Function *F) { + FunctionType *FTy = F->getFunctionType(); + + FoldingSetNodeID ID; + ID.AddInteger(F->size()); + ID.AddInteger(F->getCallingConv()); + ID.AddBoolean(F->hasGC()); + ID.AddBoolean(F->isInterposable()); + ID.AddBoolean(FTy->isVarArg()); + ID.AddInteger(getTypeIDForHash(FTy->getReturnType())); + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + ID.AddInteger(getTypeIDForHash(FTy->getParamType(i))); + return ID.ComputeHash(); +} + + +/// Replace Inst1 by a switch statement that executes Inst1 or one of Inst2s +/// depending on the value of SwitchVal. If a value in Inst2s is NULL, it +/// defaults to executing Inst1. Returns set of terminator instructions of newly +/// created switch blocks in Ret. +/// +/// For instance, the transformation may look as follows: +/// ...Head... +/// Inst1 with all of Insts2s without parents +/// ...Tail... +/// into +/// ...Head... +/// Switch +/// / | \ . +/// (default) (1) (2) +/// Inst1 Inst2s[0] Inst2s[1] +/// Ret[0] Ret[1] Ret[2] +/// \ | / +/// ...Tail... +/// +static void SplitBlockAndInsertSwitch( + Value *SwitchVal, Instruction *Inst1, + SmallVectorImpl &Inst2s, + SmallVectorImpl &Ret) { + // Split block + BasicBlock *Head = Inst1->getParent(); + BasicBlock *Tail = Head->splitBasicBlock(Inst1); + + // Create default block + LLVMContext &C = Head->getContext(); + BasicBlock *DefaultBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + + // Insert switch instruction at end of Head + Instruction *HeadOldTerm = Head->getTerminator(); + SwitchInst *Switch = SwitchInst::Create(SwitchVal, DefaultBlock, + Inst2s.size()); + ReplaceInstWithInst(HeadOldTerm, Switch); + + // Move instructions into the blocks + if (Inst1->isTerminator()) { + Inst1->removeFromParent(); + DefaultBlock->getInstList().push_back(Inst1); + Ret.push_back(cast(Inst1)); + } else { + Instruction *DefaultTerm = BranchInst::Create(Tail, DefaultBlock); + Inst1->moveBefore(DefaultTerm); + Ret.push_back(DefaultTerm); + } + + for (unsigned InstPos = 0, InstNum = Inst2s.size(); InstPos < InstNum; + ++InstPos) { + Instruction *Inst2 = Inst2s[InstPos]; + if (!Inst2) { + Ret.push_back(NULL); + continue; + } + + BasicBlock *CaseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + + // Update the debug information of the merged instruction by marking it as + // 'inlined' at this location. If only Inst1 or Inst2 has debug + // information, we try to do something sensible that won't break the + // verifier. + if (Inst1->getDebugLoc()) { + if (Inst2->getDebugLoc()) { + const DebugLoc &I2Loc = Inst2->getDebugLoc(); + Inst2->setDebugLoc( + DebugLoc::get(I2Loc.getLine(), I2Loc.getCol(), I2Loc.getScope(), + /*InlinedAt*/ Inst1->getDebugLoc().getAsMDNode())); + } else { + Inst2->setDebugLoc(Inst1->getDebugLoc()); + } + } else if (Inst2->getDebugLoc()) { + Inst2->setDebugLoc(DebugLoc()); + } + + if (Inst2->isTerminator()) { + assert(Inst1->isTerminator() && + "Inst1 and Inst2 must both be terminators or non-terminators!"); + CaseBlock->getInstList().push_back(Inst2); + Ret.push_back(cast(Inst2)); + } else { + Instruction *CaseTerm = BranchInst::Create(Tail, CaseBlock); + Inst2->insertBefore(CaseTerm); + Ret.push_back(CaseTerm); + } + + Switch->addCase(ConstantInt::get(cast(SwitchVal->getType()), + InstPos+1), + CaseBlock); + } + + // If Inst1 (and Inst2s) are Terminator Inst's, Tail will be empty and can be + // deleted now. We also need to update PHI nodes to add the additional + // incoming blocks from the SwitchInst. + if (Inst1->isTerminator()) { + for (succ_iterator I = succ_begin(DefaultBlock), E = succ_end(DefaultBlock); + I != E; ++I) { + BasicBlock *Successor = *I; + PHINode *Phi; + + for (BasicBlock::iterator II = Successor->begin(); + (Phi = dyn_cast(II)); ++II) + for (unsigned ValId = 0, ValEnd = Phi->getNumIncomingValues(); + ValId != ValEnd; ++ValId) + if (Phi->getIncomingBlock(ValId) == Tail) { + Phi->setIncomingBlock(ValId, DefaultBlock); + SmallVectorImpl::iterator + SwitchI = Ret.begin(), SwitchE = Ret.end(); + for (++SwitchI; SwitchI != SwitchE; ++SwitchI) { + if (!*SwitchI) + continue; + Phi->addIncoming(Phi->getIncomingValue(ValId), + (*SwitchI)->getParent()); + } + } + } + + Tail->eraseFromParent(); + } +} + +/// Insert function NewF into module, placing it immediately after the +/// existing function PredF. If PredF does not exist, insert at the end. +static void insertFunctionAfter(Function *NewF, Function *PredF) { + Module *M = PredF->getParent(); + Module::FunctionListType &FList = M->getFunctionList(); + + for (Module::FunctionListType::iterator I = FList.begin(), E = FList.end(); + I != E; ++I) { + if (PredF == &*I) { + FList.insertAfter(I, NewF); + return; + } + } + + // Couldn't find PredF, insert at end + FList.push_back(NewF); +} + +/// Create a cast instruction if needed to cast V to type DstType. We treat +/// pointer and integer types of the same bitwidth as equivalent, so this can be +/// used to cast them to each other where needed. The function returns the Value +/// itself if no cast is needed, or a new CastInst instance inserted before +/// InsertBefore. The integer type equivalent to pointers must be passed as +/// IntPtrType (get it from DataLayout). This is guaranteed to generate no-op +/// casts, otherwise it will assert. +static Value *createCastIfNeeded(Value *V, Type *DstType, + Value *InstrOrBB, Type *IntPtrType) { + if (V->getType() == DstType) + return V; + + BasicBlock *InsertAtEnd = dyn_cast(InstrOrBB); + Instruction *InsertBefore = dyn_cast(InstrOrBB); + BasicBlock *InsertBB = InsertAtEnd ? InsertAtEnd : InsertBefore->getParent(); + + CastInst *Result; + Type *OrigType = V->getType(); + + if (OrigType->isStructTy()) { + assert(DstType->isStructTy()); + assert(OrigType->getStructNumElements() == DstType->getStructNumElements()); + + IRBuilder<> Builder(InsertBB); + if (InsertBefore) + Builder.SetInsertPoint(InsertBefore); + Value *Result = UndefValue::get(DstType); + for (unsigned int I = 0, E = OrigType->getStructNumElements(); I < E; ++I) { + Value *ExtractedValue + = Builder.CreateExtractValue(V, ArrayRef(I)); + Value *Element = createCastIfNeeded(ExtractedValue, + DstType->getStructElementType(I), + InstrOrBB, IntPtrType); + Result = + Builder.CreateInsertValue(Result, Element, ArrayRef(I)); + } + return Result; + } + assert(!DstType->isStructTy()); + + if (OrigType->isPointerTy() + && (DstType->isIntegerTy() || DstType->isPointerTy())) { + if (InsertBefore) + Result = CastInst::CreatePointerCast(V, DstType, "", InsertBefore); + else + Result = CastInst::CreatePointerCast(V, DstType, "", InsertAtEnd); + } else if (OrigType->isIntegerTy() && DstType->isPointerTy() + && OrigType == IntPtrType) { + // Int -> Ptr + if (InsertBefore) { + Result = CastInst::Create(CastInst::IntToPtr, V, DstType, "", + InsertBefore); + } else { + Result = CastInst::Create(CastInst::IntToPtr, V, DstType, "", + InsertAtEnd); + } + } else { + llvm_unreachable("Can only cast int -> ptr or ptr -> (ptr or int)"); + } + + assert(cast(Result)->isNoopCast(IntPtrType) && + "Cast is not a no-op cast. Potential loss of precision"); + + return Result; +} + +namespace { + +/// ComparableFunction - A struct that pairs together functions with a +/// DataLayout so that we can keep them together as elements in the DenseSet. +class ComparableFunction { +public: + ComparableFunction() : Func(0), IsNew(false) { } + + ComparableFunction(const ComparableFunction &that) + : Func(that.Func), IsNew(that.IsNew) { + } + + ComparableFunction(Function *Func) : Func(Func), IsNew(true) { } + + ~ComparableFunction() { } + + ComparableFunction &operator=(const ComparableFunction &that) { + Func = that.Func; + IsNew = that.IsNew; + return *this; + } + + Function *getFunc() const { return Func; } + bool isNew() const { return IsNew; } + + // Drops AssertingVH reference to the function. Outside of debug mode, this + // does nothing. + void release() { + assert(Func && + "Attempted to release function twice, or release empty/tombstone!"); + Func = NULL; + } + + void markCompared() { + IsNew = false; + } +private: + AssertingVH Func; + bool IsNew; +}; + +} + +namespace { + +/// FunctionComparator - Compares two functions to determine whether or not +/// they will generate machine code with the same behaviour. DataLayout is +/// used if available. The comparator always fails conservatively (erring on the +/// side of claiming that two functions are different). +class FunctionComparator { +public: + FunctionComparator(const DataLayout *DL, Function *F1, Function *F2) + : isDifferent(false), isNotMergeable(false), + BasicBlockCount(0), InstructionCount(0), DifferingInstructionsCount(0), + F1(F1), F2(F2), SimilarityMetric(0), DL(DL), ID(CurID++) {} + + ~FunctionComparator() {} + + /// Test whether the two functions have equivalent behaviour. Returns true if + /// they are equal or can be merged, false if not. + bool compare(); + + /// Indicate whether the two functions are an exact match after comparison + bool isExactMatch(); + + /// Indicate whether the two functions candidates for merging after comparison + bool isMergeCandidate(); + + /// Get a similarity metric between the two functions. Higher means more + /// similar. + unsigned getSimilarityMetric() { + if (!SimilarityMetric) + SimilarityMetric = (unsigned)(((float)InstructionCount - + DifferingInstructionsCount)/InstructionCount*10000); + return SimilarityMetric; + } + + Function *getF1() { return F1; } + Function *getF2() { return F2; } + ValueToValueMapTy &getF1toF2Map() { return id_map; } + ValueToValueMapTy &getF2toF1Map() { return seen_values; } + const DataLayout *getDataLayout() { return DL; } + + /// Assign or look up previously assigned numbers for the two values, and + /// return whether the numbers are equal. Numbers are assigned in the order + /// visited. If NoSelfRef is set, F1 and F2 are not assigned to each other + /// (treated as 'equal'). + bool enumerate(const Value *V1, const Value *V2, bool NoSelfRef=false); + + /// Compare two Types, treating all pointer types as equal. + bool isEquivalentType(Type *Ty1, Type *Ty2) const; + + /// Instructions that differ between the two functions (F1's -> F2's inst). + MapVector DifferingInstructions; + + /// Instructions that reference F1/F2 itself (recursive calls etc.) + /// These may need special treatment when merging differing functions. + MapVector SelfRefInstructions; + + /// Return the unique ID for the object. + unsigned getID() { return ID; } + + bool isDifferent; + bool isNotMergeable; + + // Comparison statistics + unsigned BasicBlockCount; + unsigned InstructionCount; + unsigned DifferingInstructionsCount; + +private: + /// Test whether two basic blocks have equivalent behaviour. Returns true if + /// they are equal or can be merged, false if not. PHINodes are not compared + /// in this function, but added to the PHIsFound list for delayed processing. + bool compare(const BasicBlock *BB1, const BasicBlock *BB2, + std::list > *PHIsFound); + + /// Compare pairs of PHI nodes. Returns true if all pairs are equal or can + /// be merged, false if not. + bool comparePHIs( + const std::list > &PHIs); + + /// Compare two Instructions for equivalence, similar to + /// Instruction::isSameOperationAs but with modifications to the type + /// comparison. + bool isEquivalentOperation(const Instruction *I1, + const Instruction *I2) const; + + /// Compare two GEPs for equivalent pointer arithmetic. + bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2); + bool isEquivalentGEP(const GetElementPtrInst *GEP1, + const GetElementPtrInst *GEP2) { + return isEquivalentGEP(cast(GEP1), cast(GEP2)); + } + + // The two functions undergoing comparison. + Function *F1, *F2; + + unsigned SimilarityMetric; + + const DataLayout *DL; + + ValueToValueMapTy id_map; + ValueToValueMapTy seen_values; + + // Maintain a unique ID for each object. + static unsigned CurID; + unsigned ID; +}; + +} + +unsigned FunctionComparator::CurID = 0; + +// Any two pointers in the same address space are equivalent, intptr_t and +// pointers are equivalent. Otherwise, standard type equivalence rules apply. +bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const { + if (Ty1 == Ty2) + return true; + if (Ty1->getTypeID() != Ty2->getTypeID()) { + LLVMContext &Ctx = Ty1->getContext(); + if (isa(Ty1) && Ty2 == DL->getIntPtrType(Ctx)) return true; + if (isa(Ty2) && Ty1 == DL->getIntPtrType(Ctx)) return true; + return false; + } + + switch (Ty1->getTypeID()) { + default: + llvm_unreachable("Unknown type!"); + // Fall through in Release mode. + case Type::IntegerTyID: + case Type::VectorTyID: + // Ty1 == Ty2 would have returned true earlier. + return false; + + case Type::VoidTyID: + case Type::FloatTyID: + case Type::DoubleTyID: + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + case Type::MetadataTyID: + return true; + + case Type::PointerTyID: { + PointerType *PTy1 = cast(Ty1); + PointerType *PTy2 = cast(Ty2); + return PTy1->getAddressSpace() == PTy2->getAddressSpace(); + } + + case Type::StructTyID: { + StructType *STy1 = cast(Ty1); + StructType *STy2 = cast(Ty2); + if (STy1->getNumElements() != STy2->getNumElements()) + return false; + + if (STy1->isPacked() != STy2->isPacked()) + return false; + + for (unsigned i = 0, e = STy1->getNumElements(); i != e; ++i) { + if (!isEquivalentType(STy1->getElementType(i), STy2->getElementType(i))) + return false; + } + return true; + } + + case Type::FunctionTyID: { + FunctionType *FTy1 = cast(Ty1); + FunctionType *FTy2 = cast(Ty2); + if (FTy1->getNumParams() != FTy2->getNumParams() || + FTy1->isVarArg() != FTy2->isVarArg()) + return false; + + if (!isEquivalentType(FTy1->getReturnType(), FTy2->getReturnType())) + return false; + + for (unsigned i = 0, e = FTy1->getNumParams(); i != e; ++i) { + if (!isEquivalentType(FTy1->getParamType(i), FTy2->getParamType(i))) + return false; + } + return true; + } + + case Type::ArrayTyID: { + ArrayType *ATy1 = cast(Ty1); + ArrayType *ATy2 = cast(Ty2); + return ATy1->getNumElements() == ATy2->getNumElements() && + isEquivalentType(ATy1->getElementType(), ATy2->getElementType()); + } + } +} + +// Determine whether the two operations are the same except that pointer-to-A +// and pointer-to-B are equivalent. This should be kept in sync with +// Instruction::isSameOperationAs. +bool FunctionComparator::isEquivalentOperation(const Instruction *I1, + const Instruction *I2) const { + // Differences from Instruction::isSameOperationAs: + // * replace type comparison with calls to isEquivalentType. + // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top + // * because of the above, we don't test for the tail bit on calls later on + if (I1->getOpcode() != I2->getOpcode() || + I1->getNumOperands() != I2->getNumOperands() || + !isEquivalentType(I1->getType(), I2->getType()) || + !I1->hasSameSubclassOptionalData(I2)) + return false; + + // We have two instructions of identical opcode and #operands. Check to see + // if all operands are the same type + for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i) + if (!isEquivalentType(I1->getOperand(i)->getType(), + I2->getOperand(i)->getType())) + return false; + + // Check special state that is a part of some instructions. + if (const LoadInst *LI = dyn_cast(I1)) { + const LoadInst *LI2 = cast(I2); + return LI->isVolatile() == LI2->isVolatile() && + LI->getAlignment() == LI2->getAlignment() && + LI->getOrdering() == LI2->getOrdering() && + LI->getSyncScopeID() == LI2->getSyncScopeID() && + LI->getMetadata(LLVMContext::MD_range) + == LI2->getMetadata(LLVMContext::MD_range); + } + if (const StoreInst *SI = dyn_cast(I1)) + return SI->isVolatile() == cast(I2)->isVolatile() && + SI->getAlignment() == cast(I2)->getAlignment() && + SI->getOrdering() == cast(I2)->getOrdering() && + SI->getSyncScopeID() == cast(I2)->getSyncScopeID(); + if (const AllocaInst *AI = dyn_cast(I1)) { + if (AI->getArraySize() != cast(I2)->getArraySize() || + AI->getAlignment() != cast(I2)->getAlignment()) + return false; + + // If size is known, I2 can be seen as equivalent to I1 if it allocates + // the same or less memory. + if (DL->getTypeAllocSize(AI->getAllocatedType()) + < DL->getTypeAllocSize(cast(I2)->getAllocatedType())) + return false; + + return true; + } + if (const CmpInst *CI = dyn_cast(I1)) + return CI->getPredicate() == cast(I2)->getPredicate(); + if (const CallInst *CI = dyn_cast(I1)) + return CI->getCallingConv() == cast(I2)->getCallingConv() && + CI->getAttributes() == cast(I2)->getAttributes(); + if (const InvokeInst *CI = dyn_cast(I1)) + return CI->getCallingConv() == cast(I2)->getCallingConv() && + CI->getAttributes() == cast(I2)->getAttributes(); + if (const InsertValueInst *IVI = dyn_cast(I1)) + return IVI->getIndices() == cast(I2)->getIndices(); + if (const ExtractValueInst *EVI = dyn_cast(I1)) + return EVI->getIndices() == cast(I2)->getIndices(); + if (const FenceInst *FI = dyn_cast(I1)) + return FI->getOrdering() == cast(I2)->getOrdering() && + FI->getSyncScopeID() == cast(I2)->getSyncScopeID(); + if (const AtomicCmpXchgInst *CXI = dyn_cast(I1)) { + const AtomicCmpXchgInst *CXI2 = cast(I2); + return CXI->isVolatile() == CXI2->isVolatile() && + CXI->isWeak() == CXI2->isWeak() && + CXI->getSuccessOrdering() == CXI2->getSuccessOrdering() && + CXI->getFailureOrdering() == CXI2->getFailureOrdering() && + CXI->getSyncScopeID() == CXI2->getSyncScopeID(); + } + if (const AtomicRMWInst *RMWI = dyn_cast(I1)) + return RMWI->getOperation() == cast(I2)->getOperation() && + RMWI->isVolatile() == cast(I2)->isVolatile() && + RMWI->getOrdering() == cast(I2)->getOrdering() && + RMWI->getSyncScopeID() == cast(I2)->getSyncScopeID(); + + return true; +} + +// Determine whether two GEP operations perform the same underlying arithmetic. +bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, + const GEPOperator *GEP2) { + // When we have target data, we can reduce the GEP down to the value in bytes + // added to the address. + if (GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) { + SmallVector Indices1(GEP1->idx_begin(), GEP1->idx_end()); + SmallVector Indices2(GEP2->idx_begin(), GEP2->idx_end()); + uint64_t Offset1 = DL->getIndexedOffsetInType(GEP1->getSourceElementType(), + Indices1); + uint64_t Offset2 = DL->getIndexedOffsetInType(GEP2->getSourceElementType(), + Indices2); + return Offset1 == Offset2; + } + + if (GEP1->getPointerOperand()->getType() != + GEP2->getPointerOperand()->getType()) + return false; + + if (GEP1->getNumOperands() != GEP2->getNumOperands()) + return false; + + for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) { + if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i))) + return false; + } + + return true; +} + +// Compare two values used by the two functions under pair-wise comparison. If +// this is the first time the values are seen, they're added to the mapping so +// that we will detect mismatches on next use. +bool FunctionComparator::enumerate(const Value *V1, const Value *V2, + bool NoSelfRef/*=false*/) { + // Check for function @f1 referring to itself and function @f2 referring to + // itself. For compatibility with llvm's MergeFunctions, disallow referring to + // each other, or both referring to either of them. + if (!NoSelfRef && V1 == F1 && V2 == F2) + return true; + + // FIXME: This is very conservative for now, but keeping this for thinlto. + if (isa(V1) || isa(V2)) + return false; + if (const Constant *C1 = dyn_cast(V1)) { + if (V1 == V2) return true; + const Constant *C2 = dyn_cast(V2); + if (!C2) return false; + // TODO: constant expressions with GEP or references to F1 or F2. + if (C1->isNullValue() && C2->isNullValue() && + isEquivalentType(C1->getType(), C2->getType())) + return true; + // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1 + // then they must have equal bit patterns. Aggregate types cannot be + // bitcast. + if (C1->getType()->isAggregateType() || C2->getType()->isAggregateType()) + return false; + return C1->getType()->canLosslesslyBitCastTo(C2->getType()) && + C1 == ConstantExpr::getBitCast(const_cast(C2), C1->getType()); + } + + if (isa(V1) || isa(V2)) + return V1 == V2; + + // Check that V1 maps to V2. If we find a value that V1 maps to then we simply + // check whether it's equal to V2. When there is no mapping then we need to + // ensure that V2 isn't already equivalent to something else. For this + // purpose, we track the V2 values in a set. + + ValueToValueMapTy::iterator I = id_map.find(V1); + if (I != id_map.end()) + return V2 == I->second; + // FIXME: Const casts!!! + if (!seen_values.insert(std::make_pair(V2, const_cast(V1))).second) + return false; + id_map[V1] = const_cast(V2); + return true; +} + +/// Test whether two basic blocks have equivalent behaviour. Returns true if the +/// blocks can be merged, false if they cannot. Differing instructions are +/// recorded in DifferingInstructions. +bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2, + std::list > *PHIsFound) { + BasicBlock::const_iterator F1I, F1E, F2I, F2E; + + for (F1I = BB1->begin(), F1E = BB1->end(), + F2I = BB2->begin(), F2E = BB2->end(); + F1I != F1E && F2I != F2E; ++F1I, ++F2I) { + // Skip debug information + const CallInst *DbgCall; + while (F1I != F1E && (DbgCall = dyn_cast(F1I)) && + DbgCall->getCalledFunction() && + DbgCall->getCalledFunction()->hasName() && + DbgCall->getCalledFunction()->getName().startswith("llvm.dbg.")) + ++F1I; + + while (F2I != F2E && (DbgCall = dyn_cast(F2I)) && + DbgCall->getCalledFunction() && + DbgCall->getCalledFunction()->hasName() && + DbgCall->getCalledFunction()->getName().startswith("llvm.dbg.")) + ++F2I; + + if (F1I == F1E || F2I == F2E) + break; + + // Ok, we're dealing with real instructions. Check a few cases that will + // prevent merging first. + const Instruction *F1In = &*F1I; + const Instruction *F2In = &*F2I; + + // Cannot merge insts that differ in whether they have uses + if (F1In->use_empty() != F2In->use_empty()) { + // TODO: Could implement merging for this case (would need to introduce a + // dummy value in the PHI node etc.) + return false; + } + + // Cannot merge insts whose types are non-equivalent + if (!isEquivalentType(F1In->getType(), F2In->getType())) { + return false; + } + + // TODO: Currently cannot merge InvokeInsts with differing result types + // that have uses. We cannot push up a bitcast into their block after + // them because they are terminators. Would need to insert an + // additional BB. + if (isa(F1In) && !F1In->use_empty() && + F1In->getType() != F2In->getType()) + return false; + + if (!enumerate(F1In, F2In)) + goto differing_instructions; + + if (const GetElementPtrInst *GEP1 = dyn_cast(F1In)) { + const GetElementPtrInst *GEP2 = dyn_cast(F2In); + if (!GEP2) + goto differing_instructions; + + if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) + goto differing_instructions; + + if (!isEquivalentGEP(GEP1, GEP2)) + goto differing_instructions; + } else if (const PHINode *Phi1 = dyn_cast(F1In)) { + const PHINode *Phi2 = dyn_cast(F2In); + // We can't currently merge a PHI and non-PHI instruction + if (!Phi2) + return false; + + // We can't currently merge PHI nodes with different numbers of incoming + // values + if (F1In->getNumOperands() != F2In->getNumOperands()) + return false; + + // We need to treat PHI nodes specially. Their incoming values may be in a + // different order even if they are equivalent. We can't compare them + // until we've seen the incoming blocks and know which values are + // equivalent. Therefore postpone PHINode comparison until the end. + PHIsFound->push_back(std::make_pair(Phi1, Phi2)); + } else { + if (!isEquivalentOperation(F1In, F2In)) + goto differing_instructions; + + bool IsCall = isa(F1In); + assert(F1In->getNumOperands() == F2In->getNumOperands()); + for (unsigned i = 0, e = F1In->getNumOperands(); i != e; ++i) { + Value *OpF1 = F1In->getOperand(i); + Value *OpF2 = F2In->getOperand(i); + + // Allow self-reference if this is a call instruction and the last + // operand which is the called function + bool AllowSelfRef = IsCall && (i + 1) == e; + + if (!enumerate(OpF1, OpF2, !AllowSelfRef)) + goto differing_instructions; + + if (!isEquivalentType(OpF1->getType(), OpF2->getType())) + goto differing_instructions; + + if ((OpF1 == F1 && OpF2 == F2) || (OpF1 == F2 && OpF2 == F1)) + SelfRefInstructions[F1In] = F2In; + } + } + + continue; + +differing_instructions: + // Cannot merge functions with differing landing pad instructions yet. They + // would need special treatment which involves updating the corresponding + // invoke instructions. + if (isa(F1In)) + return false; + if (isa(F1In)) + return false; + + DifferingInstructions[F1In] = F2In; + } + + // We cannot currently merge basic blocks with different instruction counts + return F1I == F1E && F2I == F2E; +} + +bool FunctionComparator::comparePHIs( + const std::list > &PHIs) { + if (PHIs.empty()) + return true; + + for (std::list >::const_iterator + I = PHIs.begin(), E = PHIs.end(); I != E; ++I) { + const PHINode *Phi1 = I->first, *Phi2 = I->second; + + for (unsigned ValId = 0, ValNum = Phi1->getNumIncomingValues(); + ValId < ValNum; ++ValId) { + Value *Phi1Val = Phi1->getIncomingValue(ValId); + + // Get corresponding Phi2Val + Value *BBinPhi2Val = getF1toF2Map()[Phi1->getIncomingBlock(ValId)]; + + if (!BBinPhi2Val) + return false; // Currently can't handle differing predecessor blocks + + BasicBlock *BBinPhi2 = cast(BBinPhi2Val); + Value *Phi2Val = Phi2->getIncomingValueForBlock(BBinPhi2); + + // Enumerate the values. If the PHI node references the function itself (a + // very rare case), we mark it as different (NoSelfRef). This is only + // necessary for outline merging, not equiv merging. TODO: Make equal + // merging possible with such PHI nodes. + if (!enumerate(Phi1Val, Phi2Val,/*NoSelfRef=*/true)) { + DifferingInstructions[Phi1] = Phi2; + break; + } + } + } + + return true; +} + +// Test whether the two functions have equivalent behaviour. +bool FunctionComparator::compare() { + // We need to recheck everything, but check the things that weren't included + // in the hash first. + if (F1->getAttributes() != F2->getAttributes()) + goto not_mergeable; + + if (F1->hasGC() != F2->hasGC()) + goto not_mergeable; + + if (F1->hasGC() && F1->getGC() != F2->getGC()) + goto not_mergeable; + + if (!F1->getSection().equals(F2->getSection())) + goto not_mergeable; + + if (F1->isVarArg() != F2->isVarArg()) + goto not_mergeable; + + if (F1->isInterposable() != F2->isInterposable()) + goto not_mergeable; + + if (F1->size() != F2->size()) + goto not_mergeable; + + // TODO: if it's internal and only used in direct calls, we could handle + // this case too. + if (F1->getCallingConv() != F2->getCallingConv()) + goto not_mergeable; + + if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType())) + goto not_mergeable; + + assert(F1->arg_size() == F2->arg_size() && + "Identically typed functions have different numbers of args!"); + + // Visit the arguments so that they get enumerated in the order they're + // passed in. + for (Function::const_arg_iterator f1i = F1->arg_begin(), + f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) { + if (!enumerate(&*f1i, &*f2i)) + llvm_unreachable("Arguments repeat!"); + } + + // We do a CFG-ordered walk since the actual ordering of the blocks in the + // linked list is immaterial. Our walk starts at the entry block for both + // functions, then takes each block from each terminator in order. As an + // artifact, this also means that unreachable blocks are ignored. + { + SmallVector F1BBs, F2BBs; + SmallSet VisitedBBs; // in terms of F1. + std::list > PHIsFound; + + F1BBs.push_back(&F1->getEntryBlock()); + F2BBs.push_back(&F2->getEntryBlock()); + + VisitedBBs.insert(F1BBs[0]); + while (!F1BBs.empty()) { + const BasicBlock *F1BB = F1BBs.pop_back_val(); + const BasicBlock *F2BB = F2BBs.pop_back_val(); + + // Check for control flow divergence + if (!enumerate(F1BB, F2BB)) + goto not_mergeable; + + const Instruction *F1TI = F1BB->getTerminator(); + const Instruction *F2TI = F2BB->getTerminator(); + + // TODO: Implement merging of blocks with different numbers of + // instructions. + if (F1TI->getNumSuccessors() != F2TI->getNumSuccessors() || + F1BB->size() != F2BB->size()) + goto not_mergeable; + + // The actual instruction-by-instruction comparison + if (!compare(F1BB, F2BB, &PHIsFound)) + goto not_mergeable; + + // FIXME: Count this in compare(F1BB,F2BB) so it doesn't include debug + // instructions. + InstructionCount += std::max(F1BB->size(), F2BB->size()); + + assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors()); + for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(F1TI->getSuccessor(i)).second) + continue; + + F1BBs.push_back(F1TI->getSuccessor(i)); + F2BBs.push_back(F2TI->getSuccessor(i)); + } + } + + BasicBlockCount = VisitedBBs.size(); + + // After we've seen all values and BBs, compare the PHI nodes + if (!comparePHIs(PHIsFound)) + goto not_mergeable; + } + + if (DifferingInstructions.size()) { + // Currently we can't merge vararg functions with differing instructions. + // TODO: Explore whether this is feasible; the difficult bit is the + // additional argument we need to add. + if (F1->isVarArg()) + goto not_mergeable; + + isDifferent = true; + DifferingInstructionsCount += DifferingInstructions.size(); + + LLVM_DEBUG(float Metric = ((float)InstructionCount - DifferingInstructionsCount) + / InstructionCount*100; + dbgs() << "Similar fns: " << F1->getName() << " and " << F2->getName() + << " bbs=" << BasicBlockCount << " insts=" << InstructionCount + << " failed=" << DifferingInstructionsCount << " metric=" + << format("%0.2f", Metric) + << '\n'); + } + + return true; + +not_mergeable: + // Fail: cannot merge the two functions + isNotMergeable = true; + return false; +} + +bool FunctionComparator::isExactMatch() { + return (!isNotMergeable && !isDifferent); +} + +bool FunctionComparator::isMergeCandidate() { + if (isNotMergeable) + return false; + + if (!isDifferent) + return true; + + // Heuristic when to attempt merging + if (InstructionCount > MergeDifferingMinInsts && + DifferingInstructionsCount <= MergeMaxDiffering && + getSimilarityMetric() > MergeMinSimilarity) + return true; + + // Tolerate higher difference with higher similarity. + if (InstructionCount > 100 && + DifferingInstructionsCount <= 60 && + getSimilarityMetric() > 90 ) + return true; + + return false; +} + +namespace { + +struct FunctionComparatorOrdering { + bool operator () (FunctionComparator *LHS, FunctionComparator *RHS) const { + unsigned MetricLHS = LHS->getSimilarityMetric(), + MetricRHS = RHS->getSimilarityMetric(); + + // If the metric is the same, then default to the unique ID. We need + // to use a unique value instead of the object address to ensure + // deterministic ordering. + if (MetricLHS == MetricRHS) + return LHS->getID() > RHS->getID(); + return MetricLHS > MetricRHS; + } +}; + +class MergeRegistry { +public: + typedef MapVector > FnCompareMap; + typedef std::set + FnComparatorSet; + typedef std::map SimilarFnMap; + + ~MergeRegistry(); + + void clear(); + + /// Defer a function for consideration in the next round. + void defer(Function *F); + + /// Return true if we have deferred functions that can be enqueued. + bool haveDeferred() { return !Deferred.empty(); } + + /// Move all the deferred functions into buckets to consider them for merging. + /// Returns number of functions that have been added. + unsigned enqueue(); + + /// Add a candidate for merging + void insertCandidate(FunctionComparator *Comp); + + /// Remove a Function from the FnSet and queue it up for a second sweep of + /// analysis if Reanalyze is set. If it is a candidate for merging, remove it + /// from consideration. + void remove(Function *F, bool Reanalyze=true); + + /// Return the similarity metric of the most similar function to F that is + /// not listed in the Ignore set. + unsigned getMaxSimilarity(Function *F, const DenseSet &Ignore); + + /// The collection of buckets that contain functions that may be similar to + /// each other (same hash value). + FnCompareMap FunctionsToCompare; + + std::list FunctionsToMerge; + SimilarFnMap SimilarFunctions; + +private: + typedef std::vector FnDeferredQueue; + + /// A work queue of functions that may have been modified and should be + /// analyzed again. + FnDeferredQueue Deferred; +}; + +} // end anonymous namespace + +MergeRegistry::~MergeRegistry() { + this->clear(); +} + +void MergeRegistry::clear() { + Deferred.clear(); + SimilarFunctions.clear(); + for (std::list::iterator + I = FunctionsToMerge.begin(), E = FunctionsToMerge.end(); + I != E; ++I) { + FunctionComparator *FC = *I; + delete FC; + } + FunctionsToMerge.clear(); + FunctionsToCompare.clear(); +} + +static bool isAliasCapable(Function* G) { + return + UseGlobalAliases && G->hasGlobalUnnamedAddr() + && (G->hasExternalLinkage() || G->hasLocalLinkage() || G->hasWeakLinkage()); +} + +static bool isComparisonCandidate(Function *F) { + if (Opt::MergeLevel == Opt::size) { + // Only consider functions that are to be optimized for size. + // By default, that is all functions at -Os/-Oz and nothing at -O2. + bool Os = F->getAttributes(). + hasAttribute(AttributeList::FunctionIndex, Attribute::OptimizeForSize); + bool Oz = F->getAttributes(). + hasAttribute(AttributeList::FunctionIndex, Attribute::MinSize); + if (!Os && !Oz) + return false; + } + + // Ignore declarations and tiny functions - no point in merging those + if (F->isDeclaration()) return false; + if (F->getName().endswith(MERGED_SUFFIX)) return false; + if (F->hasAvailableExternallyLinkage()) return false; + if (F->hasFnAttribute(Attribute::AlwaysInline)) return false; + if (F->size() == 1 && F->begin()->size() < MergeMinInsts) + return isAliasCapable(F); + + return true; +} + +void MergeRegistry::defer(Function *F) { + if (isComparisonCandidate(F)) + Deferred.push_back(F); +} + +// Move functions from Deferred into buckets. remove() may have been called +// multiple times for the same function, so eliminate duplicates using the +// set. We reverse them because MergeSimilarFunctions::insert inserts at the +// front of each bucket. +unsigned MergeRegistry::enqueue() { + DenseSet InsertedFuncs; + + for (std::vector::reverse_iterator + DefI = Deferred.rbegin(), DefE = Deferred.rend(); + DefI != DefE; ++DefI) { + Value *V = *DefI; + Function *F = dyn_cast_or_null(V); + if (!F) continue; + if (InsertedFuncs.find(F) != InsertedFuncs.end()) continue; + if (!isComparisonCandidate(F)) continue; + + unsigned Hash = profileFunction(F); + FunctionsToCompare[Hash].push_front(F); + + InsertedFuncs.insert(F); + } + + Deferred.clear(); + + return InsertedFuncs.size(); +} + +void MergeRegistry::insertCandidate(FunctionComparator *Comp) { + FunctionsToMerge.push_back(Comp); + SimilarFunctions[Comp->getF1()].insert(Comp); +} + +static void removeFromBucket(Function *F, + std::list &Bucket) { + for (std::list::iterator + I = Bucket.begin(), E = Bucket.end(); I != E; ++I) { + if (I->getFunc() == F) { + Bucket.erase(I); + return; + } + } +} + +void MergeRegistry::remove(Function *F, bool Reanalyze/*=true*/) { + // There is no need to remove a function that is not already + // in a bucket. + if (!isComparisonCandidate(F)) + return; + + unsigned Hash = profileFunction(F); + std::list &Bucket = FunctionsToCompare[Hash]; + + removeFromBucket(F, Bucket); + + if (Reanalyze) + Deferred.push_back(F); + + // Check whether we have any existing FunctionComparator objects for this fn. + // If yes, discard them because F has changed. Retry merging for those + // functions by adding them to Deferred. + std::list::iterator I = FunctionsToMerge.begin(); + while (I != FunctionsToMerge.end()) { + FunctionComparator *Comp = *I; + if (Comp->getF1() == F) { + Function *OtherF = Comp->getF2(); + Deferred.push_back(OtherF); + removeFromBucket(OtherF, Bucket); + if (!SimilarFunctions[F].erase(Comp)) + llvm_unreachable("Inconsistent SimilarFunctions set"); + I = FunctionsToMerge.erase(I); + delete Comp; + } else if (Comp->getF2() == F) { + Function *OtherF = Comp->getF1(); + Deferred.push_back(OtherF); + removeFromBucket(OtherF, Bucket); + if (!SimilarFunctions[OtherF].erase(Comp)) + llvm_unreachable("Inconsistent SimilarFunctions set"); + I = FunctionsToMerge.erase(I); + delete Comp; + } else { + ++I; + } + } +} + +unsigned MergeRegistry::getMaxSimilarity(Function *F, + const DenseSet &Ignore) { + FnComparatorSet &Similar = SimilarFunctions[F]; + + for (FnComparatorSet::iterator I = Similar.begin(), E = Similar.end(); + I != E; ++I) { + FunctionComparator *Comp = *I; + if (Ignore.count(Comp->getF2())) + continue; + + return Comp->getSimilarityMetric(); + } + + return 0; +} + +namespace { + +class MergeSimilarFunctions : public ModulePass { +public: + static char ID; + MergeSimilarFunctions(const ModuleSummaryIndex *Summary = nullptr) + : ModulePass(ID) { + initializeMergeSimilarFunctionsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M); + +private: + /// Find the functions that use this Value and remove them from FnSet and + /// queue the functions. + void removeUsers(Value *V); + + /// Replace all direct calls of Old with calls of New. Will bitcast New if + /// necessary to make types match. + void replaceDirectCallers(Function *Old, Function *New); + + /// Process functions in the specified bucket, by either doing equiv merging + /// marking them for diff merging. Returns false if the bucket needs to be + /// re-scanned after an equiv merge. Sets Changed if the module was changed by + /// equiv merge. + bool mergeBucket(std::list &Fns, bool &Changed); + + /// Exhaustively compare all functions in each bucket and do equiv merging + /// where possible. Functions that have already been compared will not be + /// compared again. Returns true if the module was modified. + bool doExhaustiveCompareMerge(); + + /// Merge all the functions marked for diff merging. Returns true if the + /// module was modified. + bool doDiffMerge(); + + /// Merge two equivalent functions. Upon completion, G may be deleted, or may + /// be converted into a thunk. In either case, it should never be visited + /// again. + void mergeTwoFunctions(Function *F, Function *G); + + /// Merge a set of functions with differences. + void outlineAndMergeFunctions(SmallVectorImpl &Fns); + + /// Replace G with a thunk or an alias to F. Deletes G. + void writeThunkOrAlias(Function *F, Function *G); + + /// Replace G with a simple tail call to bitcast(F). Also replace direct uses + /// of G with bitcast(F). Deletes G. + void writeThunk(Function *F, Function *G); + + /// Replace G with a tail call to F with an additional argument. + /// + void writeThunkWithChoice(Function *NewF, Function *OldF, int Choice); + + /// Replace G with an alias to F. Deletes G. + void writeAlias(Function *F, Function *G); + + /// DataLayout for more accurate GEP comparisons. May be NULL. + const DataLayout *DL; + + /// Merge registry. Stores all the information about functions being + /// considered for merging as well as current candidates for merging. + MergeRegistry Registry; + +}; + +} // end anonymous namespace + +char MergeSimilarFunctions::ID = 0; +INITIALIZE_PASS(MergeSimilarFunctions, "mergesimilarfunc", + "Merge Similar Functions", false, false) + +ModulePass * +llvm::createMergeSimilarFunctionsPass(const ModuleSummaryIndex *S) { + return new MergeSimilarFunctions(S); +} + +bool MergeSimilarFunctions::runOnModule(Module &M) { + if (Opt::MergeLevel == Opt::none) + return false; + + bool Changed = false; + + DL = &M.getDataLayout(); + + for (auto &I : M) + Registry.defer(&I); + + do { + unsigned InsertCount = Registry.enqueue(); + + LLVM_DEBUG(dbgs() << "size of module: " << M.size() << '\n'); + LLVM_DEBUG(dbgs() << "size of worklist: " << InsertCount << '\n'); + (void)InsertCount; + + Changed |= doExhaustiveCompareMerge(); + } while (Registry.haveDeferred()); + + Changed |= doDiffMerge(); + + Registry.clear(); + return Changed; +} + +// Replace direct callers of Old with New. +void MergeSimilarFunctions::replaceDirectCallers(Function *Old, Function *New) { + Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType()); + for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); + UI != UE;) { + Use *U = &*UI; + ++UI; + CallSite CS(U->getUser()); + if (CS && CS.isCallee(U)) { + Registry.remove(CS.getInstruction()->getParent()->getParent()); + U->set(BitcastNew); + } + } +} + +// Replace G with an alias to F if possible, or else a thunk to F. Deletes G. +void MergeSimilarFunctions::writeThunkOrAlias(Function *F, Function *G) { + if (isAliasCapable(G)) { + writeAlias(F, G); + return; + } + + writeThunk(F, G); +} + +static void writeThunkBody(Function *Thunk, Function *F, + ConstantInt *Choice, const DataLayout *DL) { + BasicBlock *BB = &Thunk->getEntryBlock(); + IRBuilder<> Builder(BB); + + SmallVector Args; + unsigned i = 0; + FunctionType *FFTy = F->getFunctionType(); + Type *IntPtrTy = DL->getIntPtrType(FFTy->getContext()); + for (auto &AI : Thunk->args()) { + Value *Cast = createCastIfNeeded(&AI, FFTy->getParamType(i), BB, IntPtrTy); + Args.push_back(Cast); + ++i; + } + if (Choice) + Args.push_back(Choice); + + CallInst *CI = Builder.CreateCall(F, Args); + CI->setTailCall(); + CI->setCallingConv(F->getCallingConv()); + CI->setAttributes(F->getAttributes()); + CI->setIsNoInline(); + if (Thunk->getReturnType()->isVoidTy()) { + Builder.CreateRetVoid(); + } else { + Type *RetTy = Thunk->getReturnType(); + if (CI->getType()->isIntegerTy() && RetTy->isPointerTy()) + Builder.CreateRet(Builder.CreateIntToPtr(CI, RetTy)); + else if (CI->getType()->isPointerTy() && RetTy->isIntegerTy()) + Builder.CreateRet(Builder.CreatePtrToInt(CI, RetTy)); + else { + Value *Cast = createCastIfNeeded(CI, RetTy, BB, IntPtrTy); + Builder.CreateRet(Cast); + } + } +} + +// Replace G with a simple tail call to bitcast(F). Also replace direct uses +// of G with bitcast(F). Deletes G. +void MergeSimilarFunctions::writeThunk(Function *F, Function *G) { + if (!G->isInterposable()) { + // Redirect direct callers of G to F. + replaceDirectCallers(G, F); + } + + // If G was internal then we may have replaced all uses of G with F. If so, + // stop here and delete G. There's no need for a thunk. + if (G->hasLocalLinkage() && G->use_empty()) { + LLVM_DEBUG(dbgs() << "All uses of " << G->getName() << " replaced by " + << F->getName() << ". Removing it.\n"); + G->eraseFromParent(); + return; + } + + Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", + G->getParent()); + BasicBlock::Create(F->getContext(), "", NewG); + + writeThunkBody(NewG, F, nullptr, DL); + + NewG->copyAttributesFrom(G); + NewG->takeName(G); + removeUsers(G); + G->replaceAllUsesWith(NewG); + G->eraseFromParent(); + + LLVM_DEBUG(dbgs() << "writeThunk: " << NewG->getName() << " calling " + << F->getName() << '\n'); + ++NumThunksWritten; +} + +void MergeSimilarFunctions::writeThunkWithChoice(Function *NewF, Function *OldF, + int Choice) { + // Deleting the body of a function sets its linkage to External. Save the old + // one here and restore it at the end. + GlobalValue::LinkageTypes OldFLinkage = OldF->getLinkage(); + + // Delete OldF's body + OldF->deleteBody(); + BasicBlock::Create(OldF->getContext(), "", OldF); + + // Insert single BB with tail call + IntegerType *Int32Ty = Type::getInt32Ty(OldF->getContext()); + ConstantInt *ChoiceConst = ConstantInt::get(Int32Ty, Choice); + writeThunkBody(OldF, NewF, ChoiceConst, DL); + OldF->setLinkage(OldFLinkage); +} + +// Replace G with an alias to F and delete G. +void MergeSimilarFunctions::writeAlias(Function *F, Function *G) { + + // Replace all current uses of G in constants with F. This handles virtual + // table and other references. Do this first so that we don't modify thge + // global alias we're about to create. + SmallVector Uses; + for (auto I = G->use_begin(), E = G->use_end(); I != E; ++I) { + Use *U = I.operator->(); + Constant *CV = dyn_cast(U->getUser()); + if (!CV) continue; + Uses.push_back(U); + } + for (auto I = Uses.begin(), E= Uses.end(); I != E; ++I) { + Use *U = *I; + U->set(F); + } + + PointerType *PTy = G->getType(); + auto *GA = GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + G->getLinkage(), "", F); + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + GA->takeName(G); + GA->setVisibility(G->getVisibility()); + removeUsers(G); + G->replaceAllUsesWith(GA); + G->eraseFromParent(); + + LLVM_DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n'); + ++NumAliasesWritten; +} + +// Merge two equivalent functions. Upon completion, Function G is deleted. +void MergeSimilarFunctions::mergeTwoFunctions(Function *F, Function *G) { + if (F->isInterposable()) { + assert(G->isInterposable()); + + if (UseGlobalAliases) { + // Make them both thunks to the same internal function. + Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", + F->getParent()); + H->copyAttributesFrom(F); + H->takeName(F); + removeUsers(F); + F->replaceAllUsesWith(H); + + unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); + + writeAlias(F, G); + writeAlias(F, H); + + F->setAlignment(MaxAlignment); + F->setLinkage(GlobalValue::PrivateLinkage); + } else { + // We can't merge them. Instead, pick one and update all direct callers + // to call it and hope that we improve the instruction cache hit rate. + replaceDirectCallers(G, F); + } + + ++NumDoubleWeak; + } else { + writeThunkOrAlias(F, G); + } + + ++NumFunctionsMerged; +} + +static Value *getLastArg(Function *F) { + auto it = F->arg_begin(); + std::advance(it, F->arg_size()-1); + return it; +} + +static void insertCondAndRemapInstructions( + Instruction *F1InstInNewF, const std::vector &F2Insts, + Function *NewF, ValueToValueMapTy &F1toNewF, + const SmallVectorImpl &Comps, + Type *IntPtrTy) { + assert(F2Insts.size() == Comps.size() && + "Mis-match between F2Insts & Comps!"); + + SmallVector F2InstsInNewF; + for (unsigned FnI = 0, FnE = F2Insts.size(); FnI != FnE; ++FnI) { + const Instruction *F2Inst = F2Insts[FnI]; + if (!F2Inst) { + F2InstsInNewF.push_back(NULL); + continue; + } + + Instruction *F2InstInNewF = F2Inst->clone(); + + // Remap F2Inst: F2 values -> F1 values + RemapInstruction(F2InstInNewF, Comps[FnI]->getF2toF1Map(), + RF_NoModuleLevelChanges); + // Remap F2Inst: F1 values -> NewF values + RemapInstruction(F2InstInNewF, F1toNewF, RF_NoModuleLevelChanges); + + F2InstsInNewF.push_back(F2InstInNewF); + } + + SmallVector Terminators; + SplitBlockAndInsertSwitch(getLastArg(NewF), F1InstInNewF, + F2InstsInNewF, Terminators); + + assert(Terminators.size() == F2InstsInNewF.size() + 1 && + "Not enough terminators returned"); + + // F2InstsInNewF are now hooked up to the correct values in NewF. However, + // some of their operands may be pointers/integers so they could potentially + // have the wrong type in NewF (since we treat all pointers and integers of + // same size as equal). Insert casts if needed. + for (unsigned FnI = 0, FnE = F2InstsInNewF.size(); FnI != FnE; ++FnI) { + Instruction *F2InstInNewF = F2InstsInNewF[FnI]; + if (!F2InstInNewF) + continue; + const Instruction *F2Inst = F2Insts[FnI]; + + for (unsigned OpId=0; OpId < F2InstInNewF->getNumOperands(); ++OpId) { + Value *F2NewFOperand = F2InstInNewF->getOperand(OpId); + Value *F2OrigOperand = F2Inst->getOperand(OpId); + + if (F2NewFOperand->getType() != F2OrigOperand->getType()) { + Value *Cast = createCastIfNeeded(F2NewFOperand, + F2OrigOperand->getType(), + F2InstInNewF, + IntPtrTy); + F2InstInNewF->setOperand(OpId, Cast); + } + } + } + + if (ReturnInst *F1Ret = dyn_cast(F1InstInNewF)) { + // If we're handling differing return instructions, we need to ensure that + // they all return the same type. Since we treat pointer types as equal, we + // may need to insert a bitcast. + for (Instruction *F2Inst : F2InstsInNewF) { + if (!F2Inst) + continue; + + // F2Inst must also be a return instruction due to control flow + // isomorphism. + ReturnInst *F2Ret = cast(F2Inst); + + if (F2Ret->getReturnValue()->getType() != + F1Ret->getReturnValue()->getType()) + F2Ret->setOperand(0, + createCastIfNeeded(F2Ret->getReturnValue(), + F1Ret->getReturnValue()->getType(), + F2Ret, IntPtrTy)); + } + } else if (!F1InstInNewF->use_empty()) { + // If the instructions have uses, we need to insert a PHI node. + // + // We treat all pointer types as equal, so we may need to insert + // a bitcast to ensure that all incoming values of the PHI node have the + // same type + Type *F1IType = F1InstInNewF->getType(); + BasicBlock *TailBB = Terminators[0]->getSuccessor(0); + PHINode *Phi = + PHINode::Create(F1IType, F2InstsInNewF.size(), "", &TailBB->front()); + F1InstInNewF->replaceAllUsesWith(Phi); + + Phi->addIncoming(F1InstInNewF, F1InstInNewF->getParent()); + for (unsigned FnI = 0, FnE = F2InstsInNewF.size(); FnI != FnE; ++FnI) { + Instruction *F2InstInNewF = F2InstsInNewF[FnI]; + if (!F2InstInNewF) + continue; + + if (F2InstInNewF->getType() != F1IType) { + assert(!F2InstInNewF->isTerminator() && + "Cannot cast result of terminator instruction"); + + F2InstInNewF = cast( + createCastIfNeeded(F2InstInNewF, + F1IType, + Terminators[FnI+1], + IntPtrTy)); + } + + Phi->addIncoming(F2InstInNewF, F2InstInNewF->getParent()); + } + } +} + +static void mergePHINode(const SmallVectorImpl &Fns, + Function *NewF, + ValueToValueMapTy &VMap, /* F1->FNew */ + const PHINode *F1PhiInst, + std::vector F2PhiInsts) { + PHINode *F1PhiInNewF = dyn_cast(VMap[F1PhiInst]); + assert(F1PhiInNewF && "Cannot find F1Inst in NewF!"); + + // The incoming blocks in any of the F2PhiInsts may be in a different order. + // If this is the case, we have to reorder them. F2PhiInsts is intentionally a + // copy, so we can modify it + SmallVector GCInsts; // so we can delete them later. + for (unsigned FnI = 0, FnE = F2PhiInsts.size(); FnI != FnE; ++FnI) { + const PHINode *F2PhiInst = dyn_cast_or_null(F2PhiInsts[FnI]); + if (!F2PhiInst) + continue; + + for (unsigned I = 0, E = F1PhiInNewF->getNumIncomingValues(); I < E; ++I) { + if (!Fns[FnI]->enumerate(F1PhiInst->getIncomingBlock(I), + F2PhiInst->getIncomingBlock(I))) { + // Non-equivalent blocks in the same position - need to reorder PhiInst + PHINode *ReorderedF2PhiInst = PHINode::Create(F2PhiInst->getType(), E); + + for (unsigned II = 0; II < E; ++II) { + Value *BBVal = + Fns[FnI]->getF1toF2Map()[F1PhiInst->getIncomingBlock(II)]; + BasicBlock *BB = cast(BBVal); + Value *Val = F2PhiInst->getIncomingValueForBlock(BB); + ReorderedF2PhiInst->addIncoming(Val, BB); + } + + F2PhiInsts[FnI] = ReorderedF2PhiInst; + GCInsts.push_back(ReorderedF2PhiInst); + break; + } + } + } + + // Now merge the PHI nodes. + for (unsigned i = 0; i < F1PhiInNewF->getNumIncomingValues(); ++i) { + Value *F1InValNewF = F1PhiInNewF->getIncomingValue(i), + *F1InVal = F1PhiInst->getIncomingValue(i); + BasicBlock *F1NewFInBlock = F1PhiInNewF->getIncomingBlock(i); + // If this is a repeat occurrence of the same incoming BasicBlock, we + // will have already dealt with it in a previous iteration. + if (F1PhiInNewF->getBasicBlockIndex(F1PhiInNewF->getIncomingBlock(i)) != + (int)i) + continue; + + Value *NewIncoming = F1InValNewF; + + Instruction *InsertPt = F1NewFInBlock->getTerminator(); + + // Build up a chain of cmps and selects that pick the correct incoming + // value. + for (unsigned FnI = 0, FnE = F2PhiInsts.size(); FnI != FnE; ++FnI) { + if (!F2PhiInsts[FnI]) + continue; + const PHINode *F2PhiInst = cast(F2PhiInsts[FnI]); + Value *F2InVal = F2PhiInst->getIncomingValue(i); + + // If we know these are equivalent, there's no further work to do + if (Fns[FnI]->enumerate(F1InVal, F2InVal,/*NoSelfRef=*/true) && + Fns[FnI]->enumerate(F1PhiInst->getIncomingBlock(i), + F2PhiInst->getIncomingBlock(i))) + continue; + + assert(Fns[FnI]->enumerate(F1PhiInst->getIncomingBlock(i), + F2PhiInst->getIncomingBlock(i)) && + "Non-equivalent incoming BBs in PHI."); + + // We have different incoming values from the same block + // Translate F2's incoming value to NewF if needed + Value *F2InValNewF = F2InVal; + if (!isa(F2InVal)) { + Value *V = Fns[FnI]->getF2toF1Map()[F2InVal]; // F2->F1 + F2InValNewF = VMap[V]; // F1->NewF + assert(V && F2InValNewF && "Cannot map F2InVal to NewF"); + } + + // Cast F2InValNewF to the correct type if needed + LLVMContext &Ctx = F1InValNewF->getType()->getContext(); + const DataLayout *FTD = Fns[FnI]->getDataLayout(); + Type *IntPtrTy = FTD ? FTD->getIntPtrType(Ctx) : NULL; + F2InValNewF = createCastIfNeeded(F2InValNewF, F1InValNewF->getType(), + InsertPt, IntPtrTy); + + // Create compare & select + Value *ChoiceArg = getLastArg(NewF); + Value *SelectBit = new ICmpInst(InsertPt, + ICmpInst::ICMP_EQ, + getLastArg(NewF), + ConstantInt::get(ChoiceArg->getType(), + FnI+1)); + + // SelectBit true -> F2InValNewF, SelectBit false -> existing NewIncoming. + NewIncoming = SelectInst::Create(SelectBit, F2InValNewF, NewIncoming, "", + InsertPt); + } + + if (NewIncoming == F1InValNewF) + continue; // no change for this incoming value + + // Replace all occurrences of this incoming value/block by the new + // ones (phi nodes can have repeated arguments) + for (unsigned j=i; j < F1PhiInNewF->getNumIncomingValues(); ++j) { + if (F1PhiInNewF->getIncomingBlock(j) == F1NewFInBlock) { + F1PhiInNewF->setIncomingValue(j, NewIncoming); + } + } + } + + // Garbage-collect the reordered PHI nodes we temporarily created. + for (SmallVectorImpl::iterator I = GCInsts.begin(), + E = GCInsts.end(); I != E; ++I) + delete *I; +} + +static bool rewriteRecursiveCall( + const CallInst *F1I, const CallInst *F2I, CallInst *NewFI, + const Function *F1, const Function *F2, Function *NewF) { + if (!(F1I->getCalledFunction() == F1 && F2I->getCalledFunction() == F2) && + !(F1I->getCalledFunction() == F2 && F2I->getCalledFunction() == F1)) + return false; // not a recursive/mutually recursive call + + // Replace NewFI by recursive call to NewF with additional choice argument + SmallVector Args; + for (unsigned AI = 0, End = NewFI->getNumArgOperands(); AI < End; ++AI) { + Value *Arg = NewFI->getArgOperand(AI); + + // Check if F1 or F2 is one of the arguments (veeery unusual case, don't + // handle it for now). + if (Arg == F1 || Arg == F2) + return false; + + Args.push_back(Arg); + } + + if (F1I->getCalledFunction() == F1 && F2I->getCalledFunction() == F2) { + Args.push_back(getLastArg(NewF)); + } else { + // Need to invert the choice argument + Value *ChoiceArg = getLastArg(NewF); + Constant *One = ConstantInt::get(ChoiceArg->getType(), 1); + Args.push_back(BinaryOperator::Create(Instruction::Xor, ChoiceArg, One, "", + NewFI)); + } + + CallInst *CI = CallInst::Create(NewF, Args); + CI->setCallingConv(NewF->getCallingConv()); + + ReplaceInstWithInst(NewFI, CI); + + return true; +} + +/// Clone F1 into a new function with F1's name + MERGE_SUFFIX. Adds an +/// additional i32 argument to the function. +static Function *cloneAndAddArgument(Function *F1, ValueToValueMapTy &VMap) { + LLVMContext &Context = F1->getContext(); + + std::vector ArgTypes; + for (const auto &Arg : F1->args()) + ArgTypes.push_back(Arg.getType()); + ArgTypes.push_back(Type::getInt32Ty(Context)); + + FunctionType *FTy = FunctionType::get(F1->getFunctionType()->getReturnType(), + ArgTypes, + F1->getFunctionType()->isVarArg()); + Function *NewF = Function::Create(FTy, F1->getLinkage(), + F1->getName()+MERGED_SUFFIX); + + insertFunctionAfter(NewF, F1); + + if (F1->hasSection()) + NewF->setSection(F1->getSection()); + + if (F1->getFunctionType()->isVarArg()) + NewF->setCallingConv(CallingConv::C); + else + NewF->setCallingConv(CallingConv::Fast); + + Function::arg_iterator DestI = NewF->arg_begin(); + for (auto &Arg : F1->args()) { + Argument *DestIn = &*DestI; + DestIn->setName(Arg.getName()); // Copy the name over... + VMap[&Arg] = DestIn; // Add mapping to VMap + ++DestI; + } + + // Name the selector argument + (*DestI).setName("__merge_arg"); + + SmallVector Returns; + CloneFunctionInto(NewF, F1, VMap, CloneType::ExtractingFunctions, Returns); + // Set linkage to set visibility to default. + NewF->setLinkage(GlobalValue::InternalLinkage); + + return NewF; +} + +typedef MapVector > + CombinedDiffMap; + +void MergeSimilarFunctions::outlineAndMergeFunctions( + SmallVectorImpl &Fns) { + assert(!Fns.empty() && "Cannot merge empty set of functions"); + + // All comparator instances in Fns share the same F1 + Function *F1 = Fns.front()->getF1(); + + // Clone F1 into new function with an additional i32 argument + ValueToValueMapTy VMap; // maps F1 values -> NewF values + Function *NewF = cloneAndAddArgument(F1, VMap); + + // Combine all the DifferingInstructions maps in Fns into one single map of + // lists to aid the merging process. + // + // Map F1 instruction -> list of F2 instructions indexed by position in Fns. + CombinedDiffMap AllDifferingInstructions; + for (unsigned I = 0, E = Fns.size(); I != E; ++I) { + FunctionComparator *Comp = Fns[I]; + for (MapVector::iterator + DiffI = Comp->DifferingInstructions.begin(), + DiffE = Comp->DifferingInstructions.end(); + DiffI != DiffE; ++DiffI) { + AllDifferingInstructions[DiffI->first].resize(Fns.size()); + AllDifferingInstructions[DiffI->first][I] = DiffI->second; + } + } + + // Merge differing PHI nodes. We need to handle these first because they could + // be affected later on when we split basic blocks, thus making them + // impossible to merge. + for (CombinedDiffMap::const_iterator I = AllDifferingInstructions.begin(), + E = AllDifferingInstructions.end(); + I != E; ++I) { + const PHINode *F1PhiInst = dyn_cast(I->first); + if (!F1PhiInst) + continue; + + const std::vector &F2PhiInsts = I->second; + + mergePHINode(Fns, NewF, VMap, F1PhiInst, F2PhiInsts); + } + + // Merge recursive calls + // + // TODO: We currently only support this optimization for pairs of functions. + // If more than two functions are merged, we mark the recursive calls as + // DifferingInstructions which causes switch statements to be inserted and + // recursive calls going through thunks. It wouldn't be too hard to implement + // self-recursive calls for multi-merges. *Mutually* recursive calls with + // multi-merges are a little trickier - that's why we leave it for now. + if (Fns.size() == 1) { + FunctionComparator *Comp = Fns.front(); + for (MapVector::const_iterator + I = Comp->SelfRefInstructions.begin(), + E = Comp->SelfRefInstructions.end(); + I != E; ++I) { + const Instruction *F1I = I->first; + if (Comp->DifferingInstructions.count(F1I)) + continue; // Differing in other ways too, so deal with it later. + + // Attempt recursive call rewriting + if (isa(F1I)) { + const CallInst *F1Call = cast(F1I); + const CallInst *F2Call = dyn_cast(I->second); + CallInst *NewFCall = dyn_cast(VMap[F1I]); + + if (F1Call && F2Call && NewFCall && + rewriteRecursiveCall(F1Call, F2Call, NewFCall, + Comp->getF1(), Comp->getF2(), NewF)) + continue; + } + + // Can't rewrite it. Mark as differing and insert conditional later + Comp->DifferingInstructions[F1I] = I->second; + } + } else { + for (unsigned I = 0, E = Fns.size(); I != E; ++I) { + FunctionComparator *Comp = Fns[I]; + for (MapVector::const_iterator + II = Comp->SelfRefInstructions.begin(), + EE = Comp->SelfRefInstructions.end(); + II != EE; ++II) { + const Instruction *F1I = II->first; + if (Comp->DifferingInstructions.count(F1I)) + continue; // Differing in other ways too, so deal with it later. + + AllDifferingInstructions[F1I].resize(Fns.size()); + AllDifferingInstructions[F1I][I] = II->second; + } + } + } + + // For each differing instruction, splice basic block, and insert conditional + LLVMContext &Context = NewF->getContext(); + Type *IntPtrType = DL->getIntPtrType(Context); + for (CombinedDiffMap::const_iterator I = AllDifferingInstructions.begin(), + E = AllDifferingInstructions.end(); + I != E; ++I) { + const Instruction *F1Inst = I->first; + const std::vector &F2Insts = I->second; + + assert(VMap.find(F1Inst) != VMap.end() && + "Cannot find differing inst!"); + Instruction *F1InstInNewF = cast(VMap[F1Inst]); + + if (isa(F1InstInNewF)) + continue; // we already handled these above + + insertCondAndRemapInstructions(F1InstInNewF, F2Insts, + NewF, VMap, Fns, IntPtrType); + } + + // Replace functions with thunks + PrintMerges("FNSM", F1, NewF); + writeThunkWithChoice(NewF, F1, 0); + for (unsigned FnI = 0, FnE = Fns.size(); FnI != FnE; ++FnI) { + Function *F2 = Fns[FnI]->getF2(); + PrintMerges("FNSM", F2, NewF); + writeThunkWithChoice(NewF, F2, FnI + 1); + } + NumSimilarFunctionsMerged += Fns.size() + 1; +} + +// For each instruction used by the value, remove() the function that contains +// the instruction. This should happen right before a call to RAUW. +void MergeSimilarFunctions::removeUsers(Value *V) { + std::vector Worklist; + Worklist.push_back(V); + while (!Worklist.empty()) { + Value *V = Worklist.back(); + Worklist.pop_back(); + + for (User *U : V->users()) { + if (Instruction *I = dyn_cast(U)) { + Registry.remove(I->getParent()->getParent()); + } else if (isa(U)) { + // do nothing + } else if (Constant *C = dyn_cast(U)) { + for (User *UU : C->users()) + Worklist.push_back(UU); + } + } + } +} + +bool MergeSimilarFunctions::mergeBucket(std::list &Fns, + bool &Changed) { + for (std::list::iterator FnI = Fns.begin(), + FnE = Fns.end(); FnI != FnE; ++FnI) { + if (!FnI->isNew()) + continue; + + if (!FnI->getFunc()) + continue; + + SmallVector DiffMergeCandidates; + + std::list::iterator Fn2I = FnI; + for (++Fn2I; Fn2I != FnE; ++Fn2I) { + if (!Fn2I->getFunc()) + continue; + + assert(FnI->getFunc() != Fn2I->getFunc() && + "Duplicate function in list!"); + + FunctionComparator *Comp = new FunctionComparator(DL, FnI->getFunc(), + Fn2I->getFunc()); + + if (!Comp->compare() || !Comp->isMergeCandidate()) { + delete Comp; + continue; + } + + // Never thunk a strong function to a weak function. + assert(!FnI->getFunc()->isInterposable() || + Fn2I->getFunc()->isInterposable()); + + if (Comp->isExactMatch()) { + // Equiv merge the two functions. Throw away any diff merge + // candidate we might have found so far. + delete Comp; + + LLVM_DEBUG(dbgs() << "- Equiv merge " << FnI->getFunc()->getName() + << " == " << Fn2I->getFunc()->getName() << '\n'); + + PrintMerges("FNEQ", FnI->getFunc(), Fn2I->getFunc()); + + Function *DeleteF = Fn2I->getFunc(); + Registry.remove(DeleteF, /*reanalyze=*/false); + + mergeTwoFunctions(FnI->getFunc(), DeleteF); + + Changed = true; + + // mergeTwoFunctions may have removed functions from this bucket and + // invalidated the iterators. Rescan the whole bucket, continuing + // from the current function (previous ones will have been + // markCompared()) + for (SmallVector::iterator + I = DiffMergeCandidates.begin(), E = DiffMergeCandidates.end(); + I != E; ++I) + delete *I; + + return false; + } else { + DiffMergeCandidates.push_back(Comp); + } + } + + if (!DiffMergeCandidates.empty()) { + // Add to our list of candidates for diff merging + for (SmallVector::iterator + I = DiffMergeCandidates.begin(), E = DiffMergeCandidates.end(); + I != E; ++I) { + Registry.insertCandidate(*I); + } + } + + FnI->markCompared(); + } + + return true; +} + +bool MergeSimilarFunctions::doExhaustiveCompareMerge() { + bool Changed = false; + + // Process buckets with strong functions first. + for (MergeRegistry::FnCompareMap::iterator + BucketsI = Registry.FunctionsToCompare.begin(), + BucketsE = Registry.FunctionsToCompare.end(); + BucketsI != BucketsE; ++BucketsI) { + std::list &Fns = BucketsI->second; + if (Fns.size() < 2 || Fns.front().getFunc()->isInterposable()) + continue; + + LLVM_DEBUG(dbgs() << "Processing strong bucket " << BucketsI->first << " with " + << Fns.size() << " functions\n"); + // Repeatedly scan this bucket, until we find no more functions to equiv + // merge. + while (!mergeBucket(Fns, Changed) && Fns.size() > 1) { + LLVM_DEBUG(dbgs() << "Rescanning bucket.\n"); + } + } + + // Process buckets with weak functions. + for (MergeRegistry::FnCompareMap::iterator + BucketsI = Registry.FunctionsToCompare.begin(), + BucketsE = Registry.FunctionsToCompare.end(); + BucketsI != BucketsE; ++BucketsI) { + std::list &Fns = BucketsI->second; + if (Fns.size() < 2 || !Fns.front().getFunc()->isInterposable()) + continue; + + LLVM_DEBUG(dbgs() << "Processing weak bucket " << BucketsI->first << " with " + << Fns.size() << " functions\n"); + // Repeatedly scan this bucket, until we find no more functions to equiv + // merge. + while (!mergeBucket(Fns, Changed) && Fns.size() > 1) { + LLVM_DEBUG(dbgs() << "Rescanning bucket.\n"); + } + } + + return Changed; +} + +static bool orderComparatorsByMetric(FunctionComparator *Cmp1, + FunctionComparator *Cmp2) { + return (Cmp1->getSimilarityMetric() > Cmp2->getSimilarityMetric()); +} + +bool MergeSimilarFunctions::doDiffMerge() { + if (Registry.FunctionsToMerge.empty()) + return false; + + bool Changed = false; + DenseSet MergedFns; // Functions that have already been merged + Registry.FunctionsToMerge.sort(orderComparatorsByMetric); + + for (std::list::iterator + I = Registry.FunctionsToMerge.begin(), + E = Registry.FunctionsToMerge.end(); + I != E; ++I) { + FunctionComparator *Comp = *I; + Function *F1 = Comp->getF1(); + // Ignore it if we've already merged this fn + if (MergedFns.count(F1) || MergedFns.count(Comp->getF2())) + continue; + + assert(Registry.SimilarFunctions.count(F1) && + "Comparator doesn't exist in SimilarFunctions map"); + + // Look at all functions F that F1 could be merged with. Merge with each F, + // unless there is another function F' that is more similar to F than F1. + MergeRegistry::FnComparatorSet &SimilarFns = Registry.SimilarFunctions[F1]; + SmallVector CurrentMerge; + + for (MergeRegistry::FnComparatorSet::iterator + CandidateI = SimilarFns.begin(), CandidateE = SimilarFns.end(); + CandidateI != CandidateE; ++CandidateI) { + FunctionComparator *Comp2 = *CandidateI; + assert(Comp2->getF1() == F1 && "Inconsistency in SimilarFunctions"); + Function *F2 = Comp2->getF2(); + + // Ignore it if we've already merged this fn + if (MergedFns.count(F2)) + continue; + + // Check whether there is a better merge candidate for F2 + if (Registry.getMaxSimilarity(F2, MergedFns) > + Comp2->getSimilarityMetric()) + continue; + + // Ok, we actually want to merge with F2 + CurrentMerge.push_back(Comp2); + MergedFns.insert(F2); + } + + if (CurrentMerge.empty()) + continue; + + MergedFns.insert(F1); + + NumMultiMerged += CurrentMerge.size(); + + LLVM_DEBUG(dbgs() << "- Multi merge of " << F1->getName() << " with " + << CurrentMerge.size() << " functions.\n"); + + Changed = true; + outlineAndMergeFunctions(CurrentMerge); + } + + return Changed; +} diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -153,6 +153,10 @@ EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics")); +static cl::opt EnableMergeSimilarFunctions( + "enable-merge-sim-functions", cl::init(false), cl::Hidden, + cl::desc("Enable the Function merging pass (default = on)")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -609,6 +613,11 @@ MPM.add(createOpenMPOptLegacyPass()); MPM.add(createPostOrderFunctionAttrsLegacyPass()); + if (EnableMergeSimilarFunctions) { + auto *Summary = (ImportSummary ? ImportSummary : ExportSummary); + MPM.add(createMergeSimilarFunctionsPass(Summary)); + } + if (OptLevel > 2) MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args @@ -820,6 +829,9 @@ if (MergeFunctions) MPM.add(createMergeFunctionsPass()); + if (EnableMergeSimilarFunctions) + MPM.add(createMergeSimilarFunctionsPass()); + // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, // LoopSink pass needs to be a very late IR pass to avoid undoing LICM @@ -1047,6 +1059,8 @@ // currently it damages debug info. if (MergeFunctions) PM.add(createMergeFunctionsPass()); + if (EnableMergeSimilarFunctions) + PM.add(createMergeSimilarFunctionsPass()); } void PassManagerBuilder::populateThinLTOPassManager( diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -80,10 +80,8 @@ // Clone OldFunc into NewFunc, transforming the old arguments into references to // VMap values. -// void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, - ValueToValueMapTy &VMap, - bool ModuleLevelChanges, + ValueToValueMapTy &VMap, CloneType CT, SmallVectorImpl &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, ValueMapTypeRemapper *TypeMapper, @@ -101,12 +99,12 @@ NewFunc->copyAttributesFrom(OldFunc); NewFunc->setAttributes(NewAttrs); + RemapFlags RF = + (CT == CloneType::ModuleLevelChanges) ? RF_None : RF_NoModuleLevelChanges; // Fix up the personality function that got copied over. if (OldFunc->hasPersonalityFn()) - NewFunc->setPersonalityFn( - MapValue(OldFunc->getPersonalityFn(), VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer)); + NewFunc->setPersonalityFn(MapValue(OldFunc->getPersonalityFn(), VMap, RF, + TypeMapper, Materializer)); SmallVector NewArgAttrs(NewFunc->arg_size()); AttributeList OldAttrs = OldFunc->getAttributes(); @@ -123,11 +121,11 @@ AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(), OldAttrs.getRetAttributes(), NewArgAttrs)); - bool MustCloneSP = - OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent(); + bool MustCloneSP = CT != CloneType::ExtractingFunctions && + OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent(); DISubprogram *SP = OldFunc->getSubprogram(); if (SP) { - assert(!MustCloneSP || ModuleLevelChanges); + assert(!MustCloneSP || CT == CloneType::ModuleLevelChanges); // Add mappings for some DebugInfo nodes that we don't want duplicated // even if they're distinct. auto &MD = VMap.MD(); @@ -144,10 +142,7 @@ OldFunc->getAllMetadata(MDs); for (auto MD : MDs) { NewFunc->addMetadata( - MD.first, - *MapMetadata(MD.second, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer)); + MD.first, *MapMetadata(MD.second, VMap, RF, TypeMapper, Materializer)); } // When we remap instructions, we want to avoid duplicating inlined @@ -207,9 +202,7 @@ BB != BE; ++BB) // Loop over all instructions, fixing each one as we find it... for (Instruction &II : *BB) - RemapInstruction(&II, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer); + RemapInstruction(&II, VMap, RF, TypeMapper, Materializer); // Register all DICompileUnits of the old parent module in the new parent module auto* OldModule = OldFunc->getParent(); @@ -262,8 +255,9 @@ } SmallVector Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "", - CodeInfo); + CloneType CT = F->getSubprogram() != nullptr ? CloneType::ModuleLevelChanges + : CloneType::InvalidCloneType; + CloneFunctionInto(NewF, F, VMap, CT, Returns, "", CodeInfo); return NewF; } diff --git a/llvm/lib/Transforms/Utils/CloneModule.cpp b/llvm/lib/Transforms/Utils/CloneModule.cpp --- a/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -161,7 +161,7 @@ } SmallVector Returns; // Ignore returns cloned. - CloneFunctionInto(F, &I, VMap, /*ModuleLevelChanges=*/true, Returns); + CloneFunctionInto(F, &I, VMap, CloneType::ModuleLevelChanges, Returns); if (I.hasPersonalityFn()) F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap)); diff --git a/llvm/test/Transforms/MergeSimilarFunc/merge-alloca.ll b/llvm/test/Transforms/MergeSimilarFunc/merge-alloca.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/MergeSimilarFunc/merge-alloca.ll @@ -0,0 +1,94 @@ +;RUN: opt -mergesimilarfunc -mergesimilarfunc-level=all -S < %s | FileCheck %s +; +; Test whether mergefunc merges allocas of different sizes correctly +; + +target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32" + +%struct.A = type { i32, i32 } +%struct.B = type { i32, i32, i32 } + +; Function Attrs: nounwind optsize +define void @f1() #0 { +; CHECK-LABEL: @f1__merged( +; CHECK: alloca %struct.A +; CHECK: alloca %struct.B +entry: + %a = alloca %struct.A, align 4 + %0 = bitcast %struct.A* %a to i8* + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + ret void +} + +; Function Attrs: optsize +declare void @externalFun(i8*) #1 + +; Function Attrs: nounwind optsize +define void @f2() #0 { +entry: + %a = alloca %struct.B, align 4 + %0 = bitcast %struct.B* %a to i8* + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + call void @externalFun(i8* %0) #2 + ret void +} + +; Function Attrs: nounwind optsize +define void @f3() #0 { +entry: + %a = alloca i8, align 1 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + call void @externalFun(i8* %a) #2 + ret void +} + +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind optsize } + diff --git a/llvm/test/Transforms/MergeSimilarFunc/merge-debug-info-2.ll b/llvm/test/Transforms/MergeSimilarFunc/merge-debug-info-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/MergeSimilarFunc/merge-debug-info-2.ll @@ -0,0 +1,101 @@ +; RUN: opt -S -mergesimilarfunc -mergesimilarfunc-diff-min-insts=5 < %s | FileCheck %s +; This used to fail with assertion in CloneFunction +; REQUIRES: asserts +; CHECK-LABEL: @foo__merged( + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios8.0.0" + +%struct.wibble = type { %struct.wibble.0*, %struct.wibble* } +%struct.wibble.0 = type { i64*, i8*, i64*, i64*, %struct.eggs*, %struct.wibble* } +%struct.eggs = type { %struct.wombat*, %struct.eggs* } +%struct.wombat = type { i8*, %struct.blam*, %struct.blam* } +%struct.blam = type { i8*, %struct.blam* } +%struct.snork = type { %struct.bar*, %struct.snork* } +%struct.bar = type { i64*, i8* } + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0 + +; Function Attrs: minsize nounwind optsize ssp uwtable +define hidden void @foo(%struct.wibble* %arg) #1 align 2 !dbg !4 { +bb: + %tmp = alloca %struct.wibble*, align 8 + %tmp2 = load %struct.wibble*, %struct.wibble** %tmp, align 8, !dbg !12 + %tmp3 = icmp ne %struct.wibble* %tmp2, null, !dbg !13 + br i1 %tmp3, label %bb4, label %bb13, !dbg !14 + +bb4: ; preds = %bb + call void @foo.1() #3, !dbg !26 + unreachable + +bb13: ; preds = %bb + ret void, !dbg !27 +} + +; Function Attrs: minsize nounwind optsize ssp uwtable +declare hidden void @foo.1() #1 align 2 + +; Function Attrs: minsize nounwind optsize ssp uwtable +define void @quux() unnamed_addr #1 align 2 !dbg !28 { +bb: + ret void +} + +; Function Attrs: minsize nounwind optsize ssp uwtable +define hidden void @baz(%struct.snork* %arg) #1 align 2 !dbg !30 { +bb: + %tmp = alloca %struct.snork*, align 8 + %tmp2 = load %struct.snork*, %struct.snork** %tmp, align 8, !dbg !31 + %tmp3 = icmp ne %struct.snork* %tmp2, null, !dbg !32 + br i1 %tmp3, label %bb4, label %bb13, !dbg !33 + +bb4: ; preds = %bb + call void @blam() #3, !dbg !42 + unreachable + +bb13: ; preds = %bb + ret void, !dbg !43 +} + +; Function Attrs: minsize nounwind optsize ssp uwtable +declare hidden void @blam() #1 align 2 + +attributes #0 = { argmemonly nounwind } +attributes #1 = { minsize nounwind optsize ssp uwtable } +attributes #2 = { nounwind } +attributes #3 = { minsize optsize } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "(based on LLVM 5.0.0)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly) +!1 = !DIFile(filename: "foo.cpp", directory: "/") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !{i32 7, !"PIC Level", i32 2} +!4 = distinct !DISubprogram(name: "delete", scope: !5, file: !5, line: 31, type: !6, isLocal: false, isDefinition: true, scopeLine: 32, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!5 = !DIFile(filename: "foo.h", directory: "/") +!6 = !DISubroutineType(types: !7) +!7 = !{} +!9 = !{!"any pointer", !10, i64 0} +!10 = !{!"omnipotent char", !11, i64 0} +!11 = !{!"Simple C++ TBAA"} +!12 = !DILocation(line: 33, column: 11, scope: !4) +!13 = !DILocation(line: 33, column: 16, scope: !4) +!14 = !DILocation(line: 33, column: 5, scope: !4) +!21 = !{!"_ZTSN", !9, i64 0, !9, i64 8} +!23 = !DILocation(line: 37, column: 25, scope: !4) +!24 = !DILocation(line: 37, column: 31, scope: !4) +!25 = !{!21, !9, i64 0} +!26 = !DILocation(line: 37, column: 7, scope: !4) +!27 = !DILocation(line: 41, column: 3, scope: !4) +!28 = distinct !DISubprogram(name: "~destruct", scope: !29, file: !29, line: 31, type: !6, isLocal: false, isDefinition: true, scopeLine: 32, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!29 = !DIFile(filename: "bar.h", directory: "/") +!30 = distinct !DISubprogram(name: "delete", scope: !5, file: !5, line: 31, type: !6, isLocal: false, isDefinition: true, scopeLine: 32, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!31 = !DILocation(line: 33, column: 11, scope: !30) +!32 = !DILocation(line: 33, column: 16, scope: !30) +!33 = !DILocation(line: 33, column: 5, scope: !30) +!40 = !DILocation(line: 37, column: 25, scope: !30) +!41 = !DILocation(line: 37, column: 31, scope: !30) +!42 = !DILocation(line: 37, column: 7, scope: !30) +!43 = !DILocation(line: 41, column: 3, scope: !30) diff --git a/llvm/test/Transforms/MergeSimilarFunc/merge-debug-info.ll b/llvm/test/Transforms/MergeSimilarFunc/merge-debug-info.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/MergeSimilarFunc/merge-debug-info.ll @@ -0,0 +1,243 @@ +; This used to fail the verifier with the following error: +; "dbg attachment points at wrong subprogram for function" +; RUN: opt -S -mergesimilarfunc < %s | FileCheck %s +; REQUIRES: asserts +; CHECK-LABEL: @bar__merged( +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" + +%struct.str_type = type { i8*, i32, i32 } + +; Function Attrs: nounwind optsize +define i32 @bar(i8* %b) #0 !dbg !14 { +entry: + %retval = alloca i32, align 4 + %b.addr = alloca i8*, align 4 + %res = alloca i32, align 4 + %ee = alloca %struct.str_type*, align 4 + %cleanup.dest.slot = alloca i32 + store i8* %b, i8** %b.addr, align 4, !tbaa !31 + call void @llvm.dbg.declare(metadata i8** %b.addr, metadata !18, metadata !35), !dbg !36 + %0 = bitcast i32* %res to i8*, !dbg !37 + call void @llvm.lifetime.start(i64 4, i8* %0) #4, !dbg !37 + call void @llvm.dbg.declare(metadata i32* %res, metadata !19, metadata !35), !dbg !38 + %1 = bitcast %struct.str_type** %ee to i8*, !dbg !39 + call void @llvm.lifetime.start(i64 4, i8* %1) #4, !dbg !39 + call void @llvm.dbg.declare(metadata %struct.str_type** %ee, metadata !20, metadata !35), !dbg !40 + %2 = load i8*, i8** %b.addr, align 4, !dbg !41, !tbaa !31 + %3 = bitcast i8* %2 to %struct.str_type*, !dbg !42 + store %struct.str_type* %3, %struct.str_type** %ee, align 4, !dbg !40, !tbaa !31 + %4 = load %struct.str_type*, %struct.str_type** %ee, align 4, !dbg !43, !tbaa !31 + %set = getelementptr inbounds %struct.str_type, %struct.str_type* %4, i32 0, i32 1, !dbg !45 + %5 = load i32, i32* %set, align 4, !dbg !45, !tbaa !46 + %tobool = icmp ne i32 %5, 0, !dbg !43 + br i1 %tobool, label %if.end4, label %if.then, !dbg !49 + +if.then: ; preds = %entry + %6 = load %struct.str_type*, %struct.str_type** %ee, align 4, !dbg !50, !tbaa !31 + %set1 = getelementptr inbounds %struct.str_type, %struct.str_type* %6, i32 0, i32 1, !dbg !52 + store i32 1, i32* %set1, align 4, !dbg !53, !tbaa !46 + %7 = load %struct.str_type*, %struct.str_type** %ee, align 4, !dbg !54, !tbaa !31 + %x = getelementptr inbounds %struct.str_type, %struct.str_type* %7, i32 0, i32 0, !dbg !55 + %8 = load i8*, i8** %x, align 4, !dbg !55, !tbaa !56 + %call = call i32 @foo(i8* %8) #5, !dbg !57 + store i32 %call, i32* %res, align 4, !dbg !58, !tbaa !59 + %9 = load i32, i32* %res, align 4, !dbg !60, !tbaa !59 + %tobool2 = icmp ne i32 %9, 0, !dbg !60 + br i1 %tobool2, label %if.then3, label %if.end, !dbg !62 + +if.then3: ; preds = %if.then + store i32 1, i32* %retval, align 4, !dbg !63 + store i32 1, i32* %cleanup.dest.slot, align 4 + br label %cleanup, !dbg !63 + +if.end: ; preds = %if.then + br label %if.end4, !dbg !64 + +if.end4: ; preds = %if.end, %entry + store i32 0, i32* %retval, align 4, !dbg !65 + store i32 1, i32* %cleanup.dest.slot, align 4 + br label %cleanup, !dbg !65 + +cleanup: ; preds = %if.end4, %if.then3 + %10 = bitcast %struct.str_type** %ee to i8*, !dbg !66 + call void @llvm.lifetime.end(i64 4, i8* %10) #4, !dbg !66 + %11 = bitcast i32* %res to i8*, !dbg !66 + call void @llvm.lifetime.end(i64 4, i8* %11) #4, !dbg !66 + %12 = load i32, i32* %retval, align 4, !dbg !66 + ret i32 %12, !dbg !66 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #2 + +; Function Attrs: optsize +declare i32 @foo(i8*) #3 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #2 + +; Function Attrs: nounwind optsize +define i32 @bar1(i8* %b) #0 !dbg !21 { +entry: + %retval = alloca i32, align 4 + %b.addr = alloca i8*, align 4 + %res = alloca i32, align 4 + %ee = alloca %struct.str_type*, align 4 + %cleanup.dest.slot = alloca i32 + store i8* %b, i8** %b.addr, align 4, !tbaa !31 + call void @llvm.dbg.declare(metadata i8** %b.addr, metadata !23, metadata !35), !dbg !67 + %0 = bitcast i32* %res to i8*, !dbg !68 + call void @llvm.lifetime.start(i64 4, i8* %0) #4, !dbg !68 + call void @llvm.dbg.declare(metadata i32* %res, metadata !24, metadata !35), !dbg !69 + %1 = bitcast %struct.str_type** %ee to i8*, !dbg !70 + call void @llvm.lifetime.start(i64 4, i8* %1) #4, !dbg !70 + call void @llvm.dbg.declare(metadata %struct.str_type** %ee, metadata !25, metadata !35), !dbg !71 + %2 = load i8*, i8** %b.addr, align 4, !dbg !72, !tbaa !31 + %3 = bitcast i8* %2 to %struct.str_type*, !dbg !73 + store %struct.str_type* %3, %struct.str_type** %ee, align 4, !dbg !71, !tbaa !31 + %4 = load %struct.str_type*, %struct.str_type** %ee, align 4, !dbg !74, !tbaa !31 + %get = getelementptr inbounds %struct.str_type, %struct.str_type* %4, i32 0, i32 2, !dbg !76 + %5 = load i32, i32* %get, align 4, !dbg !76, !tbaa !77 + %tobool = icmp ne i32 %5, 0, !dbg !74 + br i1 %tobool, label %if.end4, label %if.then, !dbg !78 + +if.then: ; preds = %entry + %6 = load %struct.str_type*, %struct.str_type** %ee, align 4, !dbg !79, !tbaa !31 + %get1 = getelementptr inbounds %struct.str_type, %struct.str_type* %6, i32 0, i32 2, !dbg !81 + store i32 1, i32* %get1, align 4, !dbg !82, !tbaa !77 + %7 = load %struct.str_type*, %struct.str_type** %ee, align 4, !dbg !83, !tbaa !31 + %x = getelementptr inbounds %struct.str_type, %struct.str_type* %7, i32 0, i32 0, !dbg !84 + %8 = load i8*, i8** %x, align 4, !dbg !84, !tbaa !56 + %call = call i32 @foo(i8* %8) #5, !dbg !85 + store i32 %call, i32* %res, align 4, !dbg !86, !tbaa !59 + %9 = load i32, i32* %res, align 4, !dbg !87, !tbaa !59 + %tobool2 = icmp ne i32 %9, 0, !dbg !87 + br i1 %tobool2, label %if.then3, label %if.end, !dbg !89 + +if.then3: ; preds = %if.then + store i32 1, i32* %retval, align 4, !dbg !90 + store i32 1, i32* %cleanup.dest.slot, align 4 + br label %cleanup, !dbg !90 + +if.end: ; preds = %if.then + br label %if.end4, !dbg !91 + +if.end4: ; preds = %if.end, %entry + store i32 0, i32* %retval, align 4, !dbg !92 + store i32 1, i32* %cleanup.dest.slot, align 4 + br label %cleanup, !dbg !92 + +cleanup: ; preds = %if.end4, %if.then3 + %10 = bitcast %struct.str_type** %ee to i8*, !dbg !93 + call void @llvm.lifetime.end(i64 4, i8* %10) #4, !dbg !93 + %11 = bitcast i32* %res to i8*, !dbg !93 + call void @llvm.lifetime.end(i64 4, i8* %11) #4, !dbg !93 + %12 = load i32, i32* %retval, align 4, !dbg !93 + ret i32 %12, !dbg !93 +} + +attributes #0 = { nounwind optsize } +attributes #1 = { nounwind readnone } +attributes #2 = { argmemonly nounwind } +attributes #3 = { optsize } +attributes #4 = { nounwind } +attributes #5 = { optsize } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!26, !27} +!llvm.ident = !{!28} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Clang $LLVM_VERSION_MAJOR.$LLVM_VERSION_MINOR (based on LLVM 3.9.0)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3) +!1 = !DIFile(filename: "test.i", directory: "/local/mnt/") +!2 = !{} +!3 = !{!4} +!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5, size: 32, align: 32) +!5 = !DIDerivedType(tag: DW_TAG_typedef, name: "str_type", file: !1, line: 8, baseType: !6) +!6 = !DICompositeType(tag: DW_TAG_structure_type, name: "str_type", file: !1, line: 3, size: 96, align: 32, elements: !7) +!7 = !{!8, !10, !12} +!8 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !6, file: !1, line: 5, baseType: !9, size: 32, align: 32) +!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 32, align: 32) +!10 = !DIDerivedType(tag: DW_TAG_member, name: "set", scope: !6, file: !1, line: 6, baseType: !11, size: 32, align: 32, offset: 32) +!11 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!12 = !DIDerivedType(tag: DW_TAG_member, name: "get", scope: !6, file: !1, line: 7, baseType: !11, size: 32, align: 32, offset: 64) +!14 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 10, type: !15, isLocal: false, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !17) +!15 = !DISubroutineType(types: !16) +!16 = !{!11, !9} +!17 = !{!18, !19, !20} +!18 = !DILocalVariable(name: "b", arg: 1, scope: !14, file: !1, line: 10, type: !9) +!19 = !DILocalVariable(name: "res", scope: !14, file: !1, line: 11, type: !11) +!20 = !DILocalVariable(name: "ee", scope: !14, file: !1, line: 12, type: !4) +!21 = distinct !DISubprogram(name: "bar1", scope: !1, file: !1, line: 24, type: !15, isLocal: false, isDefinition: true, scopeLine: 24, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !22) +!22 = !{!23, !24, !25} +!23 = !DILocalVariable(name: "b", arg: 1, scope: !21, file: !1, line: 24, type: !9) +!24 = !DILocalVariable(name: "res", scope: !21, file: !1, line: 25, type: !11) +!25 = !DILocalVariable(name: "ee", scope: !21, file: !1, line: 26, type: !4) +!26 = !{i32 2, !"Dwarf Version", i32 4} +!27 = !{i32 2, !"Debug Info Version", i32 3} +!28 = !{!"Clang $LLVM_VERSION_MAJOR.$LLVM_VERSION_MINOR (based on LLVM 3.9.0)"} +!31 = !{!32, !32, i64 0} +!32 = !{!"any pointer", !33, i64 0} +!33 = !{!"omnipotent char", !34, i64 0} +!34 = !{!"Simple C/C++ TBAA"} +!35 = !DIExpression() +!36 = !DILocation(line: 10, column: 23, scope: !14) +!37 = !DILocation(line: 11, column: 3, scope: !14) +!38 = !DILocation(line: 11, column: 7, scope: !14) +!39 = !DILocation(line: 12, column: 3, scope: !14) +!40 = !DILocation(line: 12, column: 13, scope: !14) +!41 = !DILocation(line: 12, column: 31, scope: !14) +!42 = !DILocation(line: 12, column: 19, scope: !14) +!43 = !DILocation(line: 14, column: 8, scope: !44) +!44 = distinct !DILexicalBlock(scope: !14, file: !1, line: 14, column: 7) +!45 = !DILocation(line: 14, column: 12, scope: !44) +!46 = !{!47, !48, i64 4} +!47 = !{!"str_type", !32, i64 0, !48, i64 4, !48, i64 8} +!48 = !{!"int", !33, i64 0} +!49 = !DILocation(line: 14, column: 7, scope: !14) +!50 = !DILocation(line: 15, column: 5, scope: !51) +!51 = distinct !DILexicalBlock(scope: !44, file: !1, line: 14, column: 18) +!52 = !DILocation(line: 15, column: 9, scope: !51) +!53 = !DILocation(line: 15, column: 13, scope: !51) +!54 = !DILocation(line: 17, column: 16, scope: !51) +!55 = !DILocation(line: 17, column: 20, scope: !51) +!56 = !{!47, !32, i64 0} +!57 = !DILocation(line: 17, column: 11, scope: !51) +!58 = !DILocation(line: 17, column: 9, scope: !51) +!59 = !{!48, !48, i64 0} +!60 = !DILocation(line: 18, column: 9, scope: !61) +!61 = distinct !DILexicalBlock(scope: !51, file: !1, line: 18, column: 9) +!62 = !DILocation(line: 18, column: 9, scope: !51) +!63 = !DILocation(line: 19, column: 7, scope: !61) +!64 = !DILocation(line: 20, column: 3, scope: !51) +!65 = !DILocation(line: 21, column: 3, scope: !14) +!66 = !DILocation(line: 22, column: 1, scope: !14) +!67 = !DILocation(line: 24, column: 24, scope: !21) +!68 = !DILocation(line: 25, column: 3, scope: !21) +!69 = !DILocation(line: 25, column: 7, scope: !21) +!70 = !DILocation(line: 26, column: 3, scope: !21) +!71 = !DILocation(line: 26, column: 13, scope: !21) +!72 = !DILocation(line: 26, column: 31, scope: !21) +!73 = !DILocation(line: 26, column: 19, scope: !21) +!74 = !DILocation(line: 28, column: 8, scope: !75) +!75 = distinct !DILexicalBlock(scope: !21, file: !1, line: 28, column: 7) +!76 = !DILocation(line: 28, column: 12, scope: !75) +!77 = !{!47, !48, i64 8} +!78 = !DILocation(line: 28, column: 7, scope: !21) +!79 = !DILocation(line: 29, column: 5, scope: !80) +!80 = distinct !DILexicalBlock(scope: !75, file: !1, line: 28, column: 17) +!81 = !DILocation(line: 29, column: 9, scope: !80) +!82 = !DILocation(line: 29, column: 13, scope: !80) +!83 = !DILocation(line: 31, column: 16, scope: !80) +!84 = !DILocation(line: 31, column: 20, scope: !80) +!85 = !DILocation(line: 31, column: 11, scope: !80) +!86 = !DILocation(line: 31, column: 9, scope: !80) +!87 = !DILocation(line: 32, column: 9, scope: !88) +!88 = distinct !DILexicalBlock(scope: !80, file: !1, line: 32, column: 9) +!89 = !DILocation(line: 32, column: 9, scope: !80) +!90 = !DILocation(line: 33, column: 7, scope: !88) +!91 = !DILocation(line: 34, column: 3, scope: !80) +!92 = !DILocation(line: 35, column: 3, scope: !21) +!93 = !DILocation(line: 36, column: 1, scope: !21) diff --git a/llvm/test/Transforms/MergeSimilarFunc/merge-equivalent-template.ll b/llvm/test/Transforms/MergeSimilarFunc/merge-equivalent-template.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/MergeSimilarFunc/merge-equivalent-template.ll @@ -0,0 +1,138 @@ +; RUN: opt -S -mergesimilarfunc %s -o - | FileCheck %s + +; CHECK: define linkonce_odr void @_ZN11FooTemplateIPvE7method1EP9FooStruct(%class.FooTemplate.0* nocapture readnone %this, %struct.FooStruct* %arg0) #1 align 2 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %field27 = getelementptr inbounds %struct.FooStruct, %struct.FooStruct* %arg0, i32 0, i32 2 +; CHECK-NEXT: %0 = load i8, i8* %field27, align 1 +; CHECK-NEXT: %lnot8 = icmp eq i8 %0, 0 +; CHECK-NEXT: br i1 %lnot8, label %for.body, label %for.end + +; CHECK: for.body: ; preds = %for.body, %entry +; CHECK-NEXT: %arg0.addr.09 = phi %struct.FooStruct* [ %2, %for.body ], [ %arg0, %entry ] +; CHECK-NEXT: %field1 = getelementptr inbounds %struct.FooStruct, %struct.FooStruct* %arg0.addr.09, i32 0, i32 1 +; CHECK-NEXT: %1 = load %struct.FooStruct*, %struct.FooStruct** %field1, align 4 +; CHECK-NEXT: tail call void @_ZN11FooTemplateIPvE7method1EP9FooStruct(%class.FooTemplate.0* %this, %struct.FooStruct* %1) #2 +; CHECK-NEXT: %field0 = getelementptr inbounds %struct.FooStruct, %struct.FooStruct* %arg0.addr.09, i32 0, i32 0 +; CHECK-NEXT: %2 = load %struct.FooStruct*, %struct.FooStruct** %field0, align 4 +; CHECK-NEXT: %3 = bitcast %struct.FooStruct* %arg0.addr.09 to i8* +; CHECK-NEXT: tail call void @_Z4bar0Pv(i8* %3) #3 +; CHECK-NEXT: tail call void @_Z4bar1Pv(i8* %3) #3 +; CHECK-NEXT: %field2 = getelementptr inbounds %struct.FooStruct, %struct.FooStruct* %2, i32 0, i32 2 +; CHECK-NEXT: %4 = load i8, i8* %field2, align 1 +; CHECK-NEXT: %lnot = icmp eq i8 %4, 0 +; CHECK-NEXT: br i1 %lnot, label %for.body, label %for.end + +; CHECK: for.end: ; preds = %for.body, %entry +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; CHECK: define linkonce_odr void @_ZN11FooTemplateIiE7method1EP9FooStruct(%class.FooTemplate* nocapture readnone, %struct.FooStruct*) #1 align 2 { +; CHECK-NEXT: %3 = bitcast %class.FooTemplate* %0 to %class.FooTemplate.0* +; CHECK-NEXT: tail call void @_ZN11FooTemplateIPvE7method1EP9FooStruct(%class.FooTemplate.0* nocapture readnone %3, %struct.FooStruct* %1) +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; CHECK: attributes #0 = { optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +; CHECK-NEXT: attributes #1 = { nounwind optsize ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +; CHECK-NEXT: attributes #2 = { optsize } +; CHECK-NEXT: attributes #3 = { nounwind optsize } + +target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32" + +%class.Foo = type { %class.FooTemplate, %class.FooTemplate.0 } +%class.FooTemplate = type { i8 } +%class.FooTemplate.0 = type { i8 } +%struct.FooStruct = type { %struct.FooStruct*, %struct.FooStruct*, i8 } + +@_ZN3FooD1Ev = alias void(%class.Foo*), void (%class.Foo*)* @_ZN3FooD2Ev + +declare %struct.FooStruct* @_ZNK11FooTemplateIPvE7method2Ev(%class.FooTemplate.0*) #1 +declare void @_Z4bar0Pv(i8*) #1 +declare void @_Z4bar1Pv(i8*) #1 +declare %struct.FooStruct* @_ZNK11FooTemplateIiE7method2Ev(%class.FooTemplate*) #1 + +; Function Attrs: nounwind optsize ssp +define void @_ZN3FooD2Ev(%class.Foo* %this) unnamed_addr #0 align 2 { +entry: + %bar_things = getelementptr inbounds %class.Foo, %class.Foo* %this, i32 0, i32 0 + tail call void @_ZN11FooTemplateIiE7method0Ev(%class.FooTemplate* %bar_things) #2 + %baz_things = getelementptr inbounds %class.Foo, %class.Foo* %this, i32 0, i32 1 + tail call void @_ZN11FooTemplateIPvE7method0Ev(%class.FooTemplate.0* %baz_things) #2 + ret void +} + +; Function Attrs: nounwind optsize ssp +define linkonce_odr void @_ZN11FooTemplateIiE7method0Ev(%class.FooTemplate* %this) #0 align 2 { +entry: + %call = tail call %struct.FooStruct* @_ZNK11FooTemplateIiE7method2Ev(%class.FooTemplate* %this) #3 + tail call void @_ZN11FooTemplateIiE7method1EP9FooStruct(%class.FooTemplate* %this, %struct.FooStruct* %call) #2 + ret void +} + +; Function Attrs: nounwind optsize ssp +define linkonce_odr void @_ZN11FooTemplateIPvE7method0Ev(%class.FooTemplate.0* %this) #0 align 2 { +entry: + %call = tail call %struct.FooStruct* @_ZNK11FooTemplateIPvE7method2Ev(%class.FooTemplate.0* %this) #3 + tail call void @_ZN11FooTemplateIPvE7method1EP9FooStruct(%class.FooTemplate.0* %this, %struct.FooStruct* %call) #2 + ret void +} + +; Function Attrs: nounwind optsize ssp +define linkonce_odr void @_ZN11FooTemplateIPvE7method1EP9FooStruct(%class.FooTemplate.0* nocapture readnone %this, %struct.FooStruct* %arg0) #0 align 2 { +entry: + %field27 = getelementptr inbounds %struct.FooStruct, %struct.FooStruct* %arg0, i32 0, i32 2 + %0 = load i8, i8* %field27, align 1 + %lnot8 = icmp eq i8 %0, 0 + br i1 %lnot8, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %arg0.addr.09 = phi %struct.FooStruct* [ %2, %for.body ], [ %arg0, %entry ] + %field1 = getelementptr inbounds %struct.FooStruct, %struct.FooStruct* %arg0.addr.09, i32 0, i32 1 + %1 = load %struct.FooStruct*, %struct.FooStruct** %field1, align 4 + tail call void @_ZN11FooTemplateIPvE7method1EP9FooStruct(%class.FooTemplate.0* %this, %struct.FooStruct* %1) #2 + %field0 = getelementptr inbounds %struct.FooStruct, %struct.FooStruct* %arg0.addr.09, i32 0, i32 0 + %2 = load %struct.FooStruct*, %struct.FooStruct** %field0, align 4 + %3 = bitcast %struct.FooStruct* %arg0.addr.09 to i8* + tail call void @_Z4bar0Pv(i8* %3) #3 + tail call void @_Z4bar1Pv(i8* %3) #3 + %field2 = getelementptr inbounds %struct.FooStruct, %struct.FooStruct* %2, i32 0, i32 2 + %4 = load i8, i8* %field2, align 1 + %lnot = icmp eq i8 %4, 0 + br i1 %lnot, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +; Function Attrs: nounwind optsize ssp +define linkonce_odr void @_ZN11FooTemplateIiE7method1EP9FooStruct(%class.FooTemplate* nocapture readnone %this, %struct.FooStruct* %arg0) #0 align 2 { +entry: + %field27 = getelementptr inbounds %struct.FooStruct, %struct.FooStruct* %arg0, i32 0, i32 2 + %0 = load i8, i8* %field27, align 1 + %lnot8 = icmp eq i8 %0, 0 + br i1 %lnot8, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %arg0.addr.09 = phi %struct.FooStruct* [ %2, %for.body ], [ %arg0, %entry ] + %field1 = getelementptr inbounds %struct.FooStruct, %struct.FooStruct* %arg0.addr.09, i32 0, i32 1 + %1 = load %struct.FooStruct*, %struct.FooStruct** %field1, align 4 + tail call void @_ZN11FooTemplateIiE7method1EP9FooStruct(%class.FooTemplate* %this, %struct.FooStruct* %1) #2 + %field0 = getelementptr inbounds %struct.FooStruct, %struct.FooStruct* %arg0.addr.09, i32 0, i32 0 + %2 = load %struct.FooStruct*, %struct.FooStruct** %field0, align 4 + %3 = bitcast %struct.FooStruct* %arg0.addr.09 to i8* + tail call void @_Z4bar0Pv(i8* %3) #3 + tail call void @_Z4bar1Pv(i8* %3) #3 + %field2 = getelementptr inbounds %struct.FooStruct, %struct.FooStruct* %2, i32 0, i32 2 + %4 = load i8, i8* %field2, align 1 + %lnot = icmp eq i8 %4, 0 + br i1 %lnot, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +attributes #0 = { nounwind optsize ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { optsize } +attributes #3 = { nounwind optsize } + diff --git a/llvm/test/Transforms/MergeSimilarFunc/merge-equivalent.ll b/llvm/test/Transforms/MergeSimilarFunc/merge-equivalent.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/MergeSimilarFunc/merge-equivalent.ll @@ -0,0 +1,226 @@ +; RUN: opt -mergesimilarfunc -S %s -o - | FileCheck %s +; +; CHECK: define i8* @foo_a(%struct.a_type* %arg0) #1 { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %for.cond + +; CHECK: for.cond: ; preds = %for.inc24, %entry +; CHECK-NEXT: %i.0 = phi i8 [ 0, %entry ], [ %inc25, %for.inc24 ] +; CHECK-NEXT: %ptr0.0 = phi i8* [ null, %entry ], [ %ptr0.3, %for.inc24 ] +; CHECK-NEXT: %conv = zext i8 %i.0 to i32 +; CHECK-NEXT: %cmp = icmp slt i32 %conv, 16 +; CHECK-NEXT: br i1 %cmp, label %for.body, label %for.end26 + +; CHECK: for.body: ; preds = %for.cond +; CHECK-NEXT: %call = call i8* @bar4(i32 %conv) #2 +; CHECK-NEXT: %call4 = call i8* @bar0(i32 %conv) #2 +; CHECK-NEXT: %0 = bitcast i8* %call4 to %struct.a_type* +; CHECK-NEXT: %call5 = call i32 @bar1(i8* %call) #2 +; CHECK-NEXT: %tobool = icmp ne i32 %call5, 0 +; CHECK-NEXT: br i1 %tobool, label %for.cond6, label %for.inc24 + +; CHECK: for.cond6: ; preds = %for.inc, %for.body +; CHECK-NEXT: %k.0 = phi i8 [ %inc, %for.inc ], [ 0, %for.body ] +; CHECK-NEXT: %ptr0.1 = phi i8* [ %ptr0.2, %for.inc ], [ %call, %for.body ] +; CHECK-NEXT: %idxprom = zext i8 %k.0 to i32 +; CHECK-NEXT: %field4 = getelementptr inbounds %struct.a_type, %struct.a_type* %arg0, i32 0, i32 4 +; CHECK-NEXT: %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* %field4, i32 0, i32 %idxprom +; CHECK-NEXT: %1 = load i8*, i8** %arrayidx, align 4 +; CHECK-NEXT: %cmp7 = icmp ne i8* %1, null +; CHECK-NEXT: br i1 %cmp7, label %land.rhs, label %for.inc24 + +; CHECK: land.rhs: ; preds = %for.cond6 +; CHECK-NEXT: %field410 = getelementptr inbounds %struct.a_type, %struct.a_type* %0, i32 0, i32 4 +; CHECK-NEXT: %arrayidx11 = getelementptr inbounds [2 x i8*], [2 x i8*]* %field410, i32 0, i32 %idxprom +; CHECK-NEXT: %2 = load i8*, i8** %arrayidx11, align 4 +; CHECK-NEXT: %cmp12 = icmp ne i8* %2, null +; CHECK-NEXT: br i1 %cmp12, label %for.body14, label %for.inc24 + +; CHECK: for.body14: ; preds = %land.rhs +; CHECK-NEXT: %3 = bitcast %struct.a_type* %0 to i8* +; CHECK-NEXT: %4 = bitcast %struct.a_type* %arg0 to i8* +; CHECK-NEXT: %call15 = call i32 @bar2(i8* %3, i8* %4) #2 +; CHECK-NEXT: %tobool16 = icmp ne i32 %call15, 0 +; CHECK-NEXT: br i1 %tobool16, label %if.then17, label %for.inc + +; CHECK: if.then17: ; preds = %for.body14 +; CHECK-NEXT: %call18 = call i32 @bar3(i8* %3, i8* %4) #2 +; CHECK-NEXT: %tobool19 = icmp ne i32 %call18, 0 +; CHECK-NEXT: br i1 %tobool19, label %if.then20, label %for.inc + +; CHECK: if.then20: ; preds = %if.then17 +; CHECK-NEXT: br label %for.inc + +; CHECK: for.inc: ; preds = %if.then20, %if.then17, %for.body14 +; CHECK-NEXT: %ptr0.2 = phi i8* [ null, %if.then20 ], [ %ptr0.1, %if.then17 ], [ null, %for.body14 ] +; CHECK-NEXT: %inc = add i8 %k.0, 1 +; CHECK-NEXT: br label %for.cond6 + +; CHECK: for.inc24: ; preds = %land.rhs, %for.cond6, %for.body +; CHECK-NEXT: %ptr0.3 = phi i8* [ %ptr0.1, %land.rhs ], [ %ptr0.1, %for.cond6 ], [ null, %for.body ] +; CHECK-NEXT: %inc25 = add i8 %i.0, 1 +; CHECK-NEXT: br label %for.cond + +; CHECK: for.end26: ; preds = %for.cond +; CHECK-NEXT: ret i8* %ptr0.0 +; CHECK-NEXT: } + +; CHECK: ; Function Attrs: nounwind optsize ssp +; CHECK-NEXT: define i8* @foo_b(%struct.b_type*) #1 { +; CHECK-NEXT: %2 = bitcast %struct.b_type* %0 to %struct.a_type* +; CHECK-NEXT: %3 = tail call i8* @foo_a(%struct.a_type* %2) +; CHECK-NEXT: ret i8* %3 +; CHECK-NEXT: } + +; CHECK: attributes #0 = { optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +; CHECK-NEXT: attributes #1 = { nounwind optsize ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +; CHECK-NEXT: attributes #2 = { optsize } + +target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32" + +%struct.a_type = type { i32, i32, i32, i8*, [2 x i8*] } +%struct.b_type = type { i32, i32, i32, i8*, [2 x i8*], i32 } + +; Function Attrs: optsize +declare i8* @bar4(i32) #1 +declare i8* @bar0(i32) #1 +declare i32 @bar1(i8*) #1 +declare i32 @bar2(i8*, i8*) #1 +declare i32 @bar3(i8*, i8*) #1 + +; Function Attrs: nounwind optsize ssp +define i8* @foo_a(%struct.a_type* %arg0) #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc24, %entry + %i.0 = phi i8 [ 0, %entry ], [ %inc25, %for.inc24 ] + %ptr0.0 = phi i8* [ null, %entry ], [ %ptr0.3, %for.inc24 ] + %conv = zext i8 %i.0 to i32 + %cmp = icmp slt i32 %conv, 16 + br i1 %cmp, label %for.body, label %for.end26 + +for.body: ; preds = %for.cond + %call = call i8* @bar4(i32 %conv) #2 + %call4 = call i8* @bar0(i32 %conv) #2 + %0 = bitcast i8* %call4 to %struct.a_type* + %call5 = call i32 @bar1(i8* %call) #2 + %tobool = icmp ne i32 %call5, 0 + br i1 %tobool, label %for.cond6, label %for.inc24 + +for.cond6: ; preds = %for.body, %for.inc + %k.0 = phi i8 [ %inc, %for.inc ], [ 0, %for.body ] + %ptr0.1 = phi i8* [ %ptr0.2, %for.inc ], [ %call, %for.body ] + %idxprom = zext i8 %k.0 to i32 + %field4 = getelementptr inbounds %struct.a_type, %struct.a_type* %arg0, i32 0, i32 4 + %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* %field4, i32 0, i32 %idxprom + %1 = load i8*, i8** %arrayidx, align 4 + %cmp7 = icmp ne i8* %1, null + br i1 %cmp7, label %land.rhs, label %for.inc24 + +land.rhs: ; preds = %for.cond6 + %field410 = getelementptr inbounds %struct.a_type, %struct.a_type* %0, i32 0, i32 4 + %arrayidx11 = getelementptr inbounds [2 x i8*], [2 x i8*]* %field410, i32 0, i32 %idxprom + %2 = load i8*, i8** %arrayidx11, align 4 + %cmp12 = icmp ne i8* %2, null + br i1 %cmp12, label %for.body14, label %for.inc24 + +for.body14: ; preds = %land.rhs + %3 = bitcast %struct.a_type* %0 to i8* + %4 = bitcast %struct.a_type* %arg0 to i8* + %call15 = call i32 @bar2(i8* %3, i8* %4) #2 + %tobool16 = icmp ne i32 %call15, 0 + br i1 %tobool16, label %if.then17, label %for.inc + +if.then17: ; preds = %for.body14 + %call18 = call i32 @bar3(i8* %3, i8* %4) #2 + %tobool19 = icmp ne i32 %call18, 0 + br i1 %tobool19, label %if.then20, label %for.inc + +if.then20: ; preds = %if.then17 + br label %for.inc + +for.inc: ; preds = %for.body14, %if.then17, %if.then20 + %ptr0.2 = phi i8* [ null, %if.then20 ], [ %ptr0.1, %if.then17 ], [ null, %for.body14 ] + %inc = add i8 %k.0, 1 + br label %for.cond6 + +for.inc24: ; preds = %for.body, %for.cond6, %land.rhs + %ptr0.3 = phi i8* [ %ptr0.1, %land.rhs ], [ %ptr0.1, %for.cond6 ], [ null, %for.body ] + %inc25 = add i8 %i.0, 1 + br label %for.cond + +for.end26: ; preds = %for.cond + ret i8* %ptr0.0 +} + +; Function Attrs: nounwind optsize ssp +define i8* @foo_b(%struct.b_type* %arg0) #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc24, %entry + %i.0 = phi i8 [ 0, %entry ], [ %inc25, %for.inc24 ] + %ptr0.0 = phi i8* [ null, %entry ], [ %ptr0.3, %for.inc24 ] + %conv = zext i8 %i.0 to i32 + %cmp = icmp slt i32 %conv, 16 + br i1 %cmp, label %for.body, label %for.end26 + +for.body: ; preds = %for.cond + %call = call i8* @bar4(i32 %conv) #2 + %call4 = call i8* @bar0(i32 %conv) #2 + %0 = bitcast i8* %call4 to %struct.b_type* + %call5 = call i32 @bar1(i8* %call) #2 + %tobool = icmp ne i32 %call5, 0 + br i1 %tobool, label %for.cond6, label %for.inc24 + +for.cond6: ; preds = %for.body, %for.inc + %k.0 = phi i8 [ %inc, %for.inc ], [ 0, %for.body ] + %ptr0.1 = phi i8* [ %ptr0.2, %for.inc ], [ %call, %for.body ] + %idxprom = zext i8 %k.0 to i32 + %field4 = getelementptr inbounds %struct.b_type, %struct.b_type* %arg0, i32 0, i32 4 + %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* %field4, i32 0, i32 %idxprom + %1 = load i8*, i8** %arrayidx, align 4 + %cmp7 = icmp ne i8* %1, null + br i1 %cmp7, label %land.rhs, label %for.inc24 + +land.rhs: ; preds = %for.cond6 + %field410 = getelementptr inbounds %struct.b_type, %struct.b_type* %0, i32 0, i32 4 + %arrayidx11 = getelementptr inbounds [2 x i8*], [2 x i8*]* %field410, i32 0, i32 %idxprom + %2 = load i8*, i8** %arrayidx11, align 4 + %cmp12 = icmp ne i8* %2, null + br i1 %cmp12, label %for.body14, label %for.inc24 + +for.body14: ; preds = %land.rhs + %3 = bitcast %struct.b_type* %0 to i8* + %4 = bitcast %struct.b_type* %arg0 to i8* + %call15 = call i32 @bar2(i8* %3, i8* %4) #2 + %tobool16 = icmp ne i32 %call15, 0 + br i1 %tobool16, label %if.then17, label %for.inc + +if.then17: ; preds = %for.body14 + %call18 = call i32 @bar3(i8* %3, i8* %4) #2 + %tobool19 = icmp ne i32 %call18, 0 + br i1 %tobool19, label %if.then20, label %for.inc + +if.then20: ; preds = %if.then17 + br label %for.inc + +for.inc: ; preds = %for.body14, %if.then17, %if.then20 + %ptr0.2 = phi i8* [ null, %if.then20 ], [ %ptr0.1, %if.then17 ], [ null, %for.body14 ] + %inc = add i8 %k.0, 1 + br label %for.cond6 + +for.inc24: ; preds = %for.body, %for.cond6, %land.rhs + %ptr0.3 = phi i8* [ %ptr0.1, %land.rhs ], [ %ptr0.1, %for.cond6 ], [ null, %for.body ] + %inc25 = add i8 %i.0, 1 + br label %for.cond + +for.end26: ; preds = %for.cond + ret i8* %ptr0.0 +} + +attributes #0 = { nounwind optsize ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { optsize } + diff --git a/llvm/test/Transforms/MergeSimilarFunc/merge-noinline.ll b/llvm/test/Transforms/MergeSimilarFunc/merge-noinline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/MergeSimilarFunc/merge-noinline.ll @@ -0,0 +1,25 @@ +; RUN: opt -S -mergesimilarfunc -mergesimilarfunc-level=all < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +declare void @stuff() + +; CHECK-LABEL: @f0( +define void @f0(i64 %p0) { +entry: + call void @stuff() + call void @stuff() + call void @stuff() + ret void +} + +; CHECK-LABEL: @f1( +; CHECK: call void @f0{{.*}} #[[ATTR:[0-9]+]] +; CHECK: attributes #[[ATTR]] = { {{.*}}noinline +define void @f1(i64 %p0) { +entry: + call void @stuff() + call void @stuff() + call void @stuff() + ret void +} + diff --git a/llvm/test/Transforms/MergeSimilarFunc/merge-ret.ll b/llvm/test/Transforms/MergeSimilarFunc/merge-ret.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/MergeSimilarFunc/merge-ret.ll @@ -0,0 +1,207 @@ +; Check that the ret instructions are merged correctly. A bug caused +; an incorrect merge and a verifier failure for this input. +; +; RUN: opt -S -mergesimilarfunc < %s | FileCheck %s +; +; CHECK-LABEL: define internal %0* @LLVMGetReturnType__merged +; CHECK: phi %0* [ +; CHECK-NEXT: ret %0* +; CHECK-NEXT: } +; + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" + +%0 = type opaque +%1 = type { %2*, i32, i32, %1** } +%2 = type { %3* } +%3 = type opaque +%4 = type { %1 } +%5 = type opaque +%6 = type { i32 (...)**, %1*, %7*, i8, i8, i16, i32 } +%7 = type { %6*, %7*, %8 } +%8 = type { i32 } +%9 = type { %10, %1*, i32, %12, i32, i8, %18* } +%10 = type { %11 } +%11 = type { %6 } +%12 = type { %13 } +%13 = type { %14 } +%14 = type { %15 } +%15 = type { %16 } +%16 = type { %17 } +%17 = type { i32, i32, i8* } +%18 = type <{ %2*, %19, %30, %70, %79, %87, %12, %93*, %94, %98, %12, %12, %12, i8*, %102, i8, [3 x i8] }> +%19 = type { %20 } +%20 = type { %21, %26* } +%21 = type { %22 } +%22 = type { %23 } +%23 = type { %24 } +%24 = type { %25, %26* } +%25 = type { %26* } +%26 = type <{ %27, [3 x i8], %24, i8, [3 x i8] }> +%27 = type <{ %9, %12, %28*, %12, %12, i8 }> +%28 = type <{ %29*, i8, [3 x i8] }> +%29 = type opaque +%30 = type { %31 } +%31 = type { %32, %37* } +%32 = type { %33 } +%33 = type { %34 } +%34 = type { %35 } +%35 = type { %36, %37* } +%36 = type { %37* } +%37 = type { %27, %35, %38, %60, %93*, %68, %4* } +%38 = type { %39 } +%39 = type { %40, %44* } +%40 = type { %41 } +%41 = type { %42 } +%42 = type { %43 } +%43 = type { %44* } +%44 = type { %6, %45, i32, %47, %37* } +%45 = type { %46 } +%46 = type { %43, %44* } +%47 = type { %48 } +%48 = type { %49, %53* } +%49 = type { %50 } +%50 = type { %51 } +%51 = type { %52 } +%52 = type { %53* } +%53 = type { %11, %54, %44*, %56 } +%54 = type { %55 } +%55 = type { %52, %53* } +%56 = type { %57 } +%57 = type { %58 } +%58 = type { %59* } +%59 = type { i8, i8, i16, i32 } +%60 = type { %61 } +%61 = type { %62, %66* } +%62 = type { %63 } +%63 = type { %64 } +%64 = type { %65 } +%65 = type { %66* } +%66 = type { %6, %67, %37* } +%67 = type { %65, %66* } +%68 = type { %69* } +%69 = type opaque +%70 = type { %71 } +%71 = type { %72, %77* } +%72 = type { %73 } +%73 = type { %74 } +%74 = type { %75 } +%75 = type { %76, %77* } +%76 = type { %77* } +%77 = type { %78, %75 } +%78 = type { %9 } +%79 = type { %80 } +%80 = type { %81, %86* } +%81 = type { %82 } +%82 = type { %83 } +%83 = type { %84 } +%84 = type { %85, %86* } +%85 = type { %86* } +%86 = type { %78, %84 } +%87 = type { %88 } +%88 = type { %89, %92* } +%89 = type { %90 } +%90 = type { %91, %92* } +%91 = type { %92* } +%92 = type { %90, %12, %18*, i8* } +%93 = type opaque +%94 = type <{ %95, %97, [3 x i8] }> +%95 = type { %96**, i32, i32, i32, i32 } +%96 = type { i32 } +%97 = type { i8 } +%98 = type { %99 } +%99 = type { %100 } +%100 = type { %101* } +%101 = type opaque +%102 = type { i8, i32, i8, [3 x i8], %103, %111, %12, %118, i8* } +%103 = type { %104, %110 } +%104 = type { %105 } +%105 = type { %106 } +%106 = type <{ %107, %108 }> +%107 = type { i8*, i8*, i8* } +%108 = type { %109 } +%109 = type { [1 x i8] } +%110 = type { [7 x %108] } +%111 = type { %112, %117 } +%112 = type { %113 } +%113 = type { %114 } +%114 = type { %107, %115 } +%115 = type { %116 } +%116 = type { [8 x i8] } +%117 = type { [15 x %115] } +%118 = type { %119, %124 } +%119 = type { %120 } +%120 = type { %121 } +%121 = type { %107, %122 } +%122 = type { %123 } +%123 = type { [16 x i8] } +%124 = type { [7 x %122] } + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: optsize +define %0* @LLVMGetReturnType(%0*) #1 { + %2 = alloca %1*, align 4 + %3 = bitcast %1** %2 to i8* + call void @llvm.lifetime.start(i64 4, i8* %3) + %4 = bitcast %1** %2 to %0** + store %0* %0, %0** %4, align 4 + %5 = call zeroext i1 @_ZN4llvm13isa_impl_wrapINS_12FunctionTypeEKPNS_4TypeEPKS2_E4doitERS4_(%1** nonnull dereferenceable(4) %2) #3 + %6 = bitcast %0* %0 to %4* + br i1 %5, label %8, label %7 + +;