Index: include/llvm/CodeGen/ParallelCG.h =================================================================== --- include/llvm/CodeGen/ParallelCG.h +++ include/llvm/CodeGen/ParallelCG.h @@ -36,7 +36,8 @@ Reloc::Model RM = Reloc::Default, CodeModel::Model CM = CodeModel::Default, CodeGenOpt::Level OL = CodeGenOpt::Default, - TargetMachine::CodeGenFileType FT = TargetMachine::CGFT_ObjectFile); + TargetMachine::CodeGenFileType FT = TargetMachine::CGFT_ObjectFile, + bool PreserveLocals = false); } // namespace llvm Index: include/llvm/Transforms/Utils/SplitModule.h =================================================================== --- include/llvm/Transforms/Utils/SplitModule.h +++ include/llvm/Transforms/Utils/SplitModule.h @@ -36,7 +36,8 @@ /// each partition. void SplitModule( std::unique_ptr M, unsigned N, - std::function MPart)> ModuleCallback); + std::function MPart)> ModuleCallback, + bool PreserveLocals = false); } // End llvm namespace Index: lib/CodeGen/ParallelCG.cpp =================================================================== --- lib/CodeGen/ParallelCG.cpp +++ lib/CodeGen/ParallelCG.cpp @@ -44,7 +44,8 @@ ArrayRef OSs, StringRef CPU, StringRef Features, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, - TargetMachine::CodeGenFileType FileType) { + TargetMachine::CodeGenFileType FileType, + bool PreserveLocals) { StringRef TripleStr = M->getTargetTriple(); std::string ErrMsg; const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg); @@ -87,7 +88,7 @@ // Pass BC using std::move to ensure that it get moved rather than // copied into the thread's context. std::move(BC)); - }); + }, PreserveLocals); for (thread &T : Threads) T.join(); Index: lib/Transforms/Utils/SplitModule.cpp =================================================================== --- lib/Transforms/Utils/SplitModule.cpp +++ lib/Transforms/Utils/SplitModule.cpp @@ -13,19 +13,171 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "split-module" + #include "llvm/Transforms/Utils/SplitModule.h" +#include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Cloning.h" +#include using namespace llvm; +namespace { +typedef EquivalenceClasses ClusterMapType; +typedef DenseMap ComdatMembersType; +typedef DenseMap ClusterIDMapType; +} + +static void addNonConstUser(ClusterMapType &GVtoClusterMap, + const GlobalValue *GV, const User *U) { + assert((!isa(U) || isa(U)) && "Bad user"); + + if (const Instruction *I = dyn_cast(U)) { + const GlobalValue *F = I->getParent()->getParent(); + GVtoClusterMap.unionSets(GV, F); + } else if (isa(U) || isa(U) || + isa(U)) { + GVtoClusterMap.unionSets(GV, cast(U)); + } else { + llvm_unreachable("Underimplemented use case"); + } +} + +// Find partitions for module in the way that no locals need to be +// globalized. +// Try to balance pack those partitions into N files since this roughly equals +// thread balancing for the backend codegen step. +static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, + unsigned N) { + // At this point module should have the proper mix of globals and locals. + // As we attempt to partition this module, we must not change any + // locals to globals. + + DEBUG(dbgs() << "Partition module with (" << M->size() << ")functions\n"); + ClusterMapType GVtoClusterMap; + ComdatMembersType ComdatMembers; + + auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) { + if (GV.isDeclaration()) + return; + + if (!GV.hasName()) + GV.setName("__llvmsplit_unnamed"); + + // Comdat groups must not be partitioned. For comdat groups that contain + // locals, record all their members here so we can keep them together. + // Comdat groups that only contain external globals are already handled by + // the MD5-based partitioning. + if (const Comdat *C = GV.getComdat()) { + auto &Member = ComdatMembers[C]; + if (Member) + GVtoClusterMap.unionSets(Member, &GV); + else + Member = &GV; + } + + // Further only iterate over local GVs. + if (!GV.hasLocalLinkage()) + return; + + for (auto *U : GV.users()) { + // For each constant that is not a GV (a pure const): + if (isa(U) && !isa(U)) { + SmallVector Worklist; + Worklist.push_back(U); + while (!Worklist.empty()) { + const User *UU = Worklist.pop_back_val(); + if (isa(UU) && !isa(UU)) { + Worklist.append(UU->user_begin(), UU->user_end()); + continue; + } + addNonConstUser(GVtoClusterMap, &GV, UU); + } + } else { + // User is an instruction, alias or GlobalValue. + addNonConstUser(GVtoClusterMap, &GV, U); + } + } + }; + + std::for_each(M->begin(), M->end(), recordGVSet); + std::for_each(M->global_begin(), M->global_end(), recordGVSet); + std::for_each(M->alias_begin(), M->alias_end(), recordGVSet); + + // Assigned all GVs to merged clusters while balancing number of objects in + // each. + auto CompareClusters = [](const std::pair &a, + const std::pair &b) { + if (a.second || b.second) + return a.second > b.second; + else + return a.first > b.first; + }; + + std::priority_queue, + std::vector>, + decltype(CompareClusters)> + BalancinQueue(CompareClusters); + // Pre-populate priority queue with N slot blanks. + for (unsigned i = 0; i < N; ++i) + BalancinQueue.push(std::make_pair(i, 0)); + + typedef std::pair SortType; + SmallVector Sets; + SmallPtrSet Visited; + + // To guarantee determinism, we have to sort SCC according to size. + // When size is the same, use leader's name. + for (ClusterMapType::iterator I = GVtoClusterMap.begin(), + E = GVtoClusterMap.end(); I != E; ++I) + if (I->isLeader()) + Sets.push_back( + std::make_pair(std::distance(GVtoClusterMap.member_begin(I), + GVtoClusterMap.member_end()), I)); + + std::sort(Sets.begin(), Sets.end(), [](const SortType &a, const SortType &b) { + if (a.first == b.first) + return a.second->getData()->getName() > b.second->getData()->getName(); + else + return a.first > b.first; + }); + + for (auto &I : Sets) { + unsigned CurrentClusterID = BalancinQueue.top().first; + unsigned CurrentClusterSize = BalancinQueue.top().second; + BalancinQueue.pop(); + + DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" << I.first + << ") ----> " << I.second->getData()->getName() << "\n"); + + for (ClusterMapType::member_iterator MI = + GVtoClusterMap.findLeader(I.second); + MI != GVtoClusterMap.member_end(); ++MI) { + if (!Visited.insert(*MI).second) + continue; + DEBUG(dbgs() << "----> " << (*MI)->getName() + << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n"); + Visited.insert(*MI); + ClusterIDMap[*MI] = CurrentClusterID; + CurrentClusterSize++; + } + // Add this set size to the number of entries in this cluster. + BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize)); + } +} + static void externalize(GlobalValue *GV) { if (GV->hasLocalLinkage()) { GV->setLinkage(GlobalValue::ExternalLinkage); @@ -62,24 +214,46 @@ void llvm::SplitModule( std::unique_ptr M, unsigned N, - std::function MPart)> ModuleCallback) { - for (Function &F : *M) - externalize(&F); - for (GlobalVariable &GV : M->globals()) - externalize(&GV); - for (GlobalAlias &GA : M->aliases()) - externalize(&GA); - - // FIXME: We should be able to reuse M as the last partition instead of - // cloning it. - for (unsigned I = 0; I != N; ++I) { - ValueToValueMapTy VMap; - std::unique_ptr MPart( - CloneModule(M.get(), VMap, [=](const GlobalValue *GV) { - return isInPartition(GV, I, N); - })); - if (I != 0) - MPart->setModuleInlineAsm(""); - ModuleCallback(std::move(MPart)); + std::function MPart)> ModuleCallback, + bool PreserveLocals) { + if (!PreserveLocals) { + for (Function &F : *M) + externalize(&F); + for (GlobalVariable &GV : M->globals()) + externalize(&GV); + for (GlobalAlias &GA : M->aliases()) + externalize(&GA); + + // FIXME: We should be able to reuse M as the last partition instead of + // cloning it. + for (unsigned I = 0; I != N; ++I) { + ValueToValueMapTy VMap; + std::unique_ptr MPart( + CloneModule(M.get(), VMap, [=](const GlobalValue *GV) { + return isInPartition(GV, I, N); + })); + if (I != 0) + MPart->setModuleInlineAsm(""); + ModuleCallback(std::move(MPart)); + } + } else { + // This performs splitting without a need for externalization, which might not + // always be possible. + ClusterIDMapType ClusterIDMap; + findPartitions(M.get(), ClusterIDMap, N); + + for (unsigned I = 0; I < N; ++I) { + ValueToValueMapTy VMap; + std::unique_ptr MPart( + CloneModule(M.get(), VMap, [&](const GlobalValue *GV) { + if (ClusterIDMap.count(GV)) + return (ClusterIDMap[GV] == I); + else + return isInPartition(GV, I, N); + })); + if (I != 0) + MPart->setModuleInlineAsm(""); + ModuleCallback(std::move(MPart)); + } } } Index: test/tools/llvm-split/preserve-locals.ll =================================================================== --- /dev/null +++ test/tools/llvm-split/preserve-locals.ll @@ -0,0 +1,65 @@ +; RUN: llvm-split -preserve-locals -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +; The local_var and local_func must not be separated. +; CHECK0: @local_var +; CHECK0: define internal fastcc void @local_func +; The main and a must not be separated. +; The main and local_func must not be together. +; CHECK1: @a +; CHECK1: define i32 @main +; CHECK1: declare fastcc void @local_func + +@a = internal global i32 0, align 4 +@global_storage = common global i32 0, align 4 +@local_var = internal global i32 0, align 4 + +; Function Attrs: nounwind +define i32 @main(i32 %x) { +entry: + %call = call fastcc i32 @foo(i32 %x, i32* nonnull @a) + %call1 = call fastcc i32 @baz(i32 %x) + %add = add nsw i32 %call, %call1 + ret i32 %add +} + +; Function Attrs: nounwind +define fastcc i32 @bar(i32 %b) { +entry: + %call = call fastcc i32 @baz(i32 %b) + ret i32 %call +} + +; Function Attrs: nounwind +define fastcc i32 @baz(i32 %x) { +entry: + store i32 %x, i32* @global_storage, align 4 + %shl = shl i32 %x, %x + ret i32 %shl +} + +; Function Attrs: noinline nounwind +define fastcc i32 @foo(i32 %a, i32* nocapture %b) { +entry: + call fastcc void @local_func() + %call = call fastcc i32 @bar(i32 %a) + %0 = load i32, i32* @global_storage, align 4 + %call1 = call fastcc i32 @baz(i32 %0) + %add = add nsw i32 %call, %call1 + store i32 %add, i32* %b, align 4 + %call.i = call fastcc i32 @baz(i32 %add) #2 + %add.i = add nsw i32 %call.i, 2 + %1 = load volatile i32, i32* @local_var, align 4 + %add3 = add nsw i32 %add.i, %1 + ret i32 %add3 +} + +; Function Attrs: noinline nounwind +define internal fastcc void @local_func() section ".text" { +entry: + %0 = load i32, i32* @global_storage, align 4 + %call = call fastcc i32 @foo(i32 %0, i32* null) + store volatile i32 %call, i32* @local_var, align 4 + ret void +} Index: test/tools/llvm-split/scc-alias.ll =================================================================== --- /dev/null +++ test/tools/llvm-split/scc-alias.ll @@ -0,0 +1,48 @@ +; All of the functions in this module must end up +; in the same partition without change of scope. +; RUN: llvm-split -j=2 -preserve-locals -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK1 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK0 %s + +; CHECK0: declare i32 @funInternal +; CHECK0: declare i32 @funExternal +; CHECK0: declare i32 @funInternal2 +; CHECK0: declare i32 @funExternal2 + +; All functions are in the same file. +; Local functions are still local. +; CHECK1: define internal i32 @funInternal +; CHECK1: define i32 @funExternal +; CHECK1: define internal i32 @funInternal2 +; CHECK1: define i32 @funExternal2 + + +@funInternalAlias = internal alias i32 (), i32 ()* @funInternal + +define internal i32 @funInternal() { +entry: + ret i32 0 +} + +; Direct call to local alias + +define i32 @funExternal() { +entry: + %x = call i32 @funInternalAlias() + ret i32 %x +} + +; Call to local function that calls local alias + +define internal i32 @funInternal2() { +entry: + %x = call i32 @funInternalAlias() + ret i32 %x +} + +define i32 @funExternal2() { +entry: + %x = call i32 @funInternal2() + ret i32 %x +} + Index: test/tools/llvm-split/scc-callchain.ll =================================================================== --- /dev/null +++ test/tools/llvm-split/scc-callchain.ll @@ -0,0 +1,48 @@ +; All of the functions in this module must end up +; in the same partition. + +; RUN: llvm-split -j=2 -preserve-locals -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK1 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK0 %s + +; CHECK0: declare i32 @funInternal0 +; CHECK0: declare i32 @funInternal1 +; CHECK0: declare i32 @funInternal2 +; CHECK0: declare i32 @funExternal + +; All functions are in the same file. +; Local functions are still local. +; CHECK1: define internal i32 @funInternal0 +; CHECK1: define internal i32 @funInternal1 +; CHECK1: define internal i32 @funInternal2 +; CHECK1: define i32 @funExternal +; CHECK1: define i32 @funExternal2 + +define internal i32 @funInternal0() { +entry: + ret i32 0 +} + +define internal i32 @funInternal1() { +entry: + %x = call i32 @funInternal0() + ret i32 %x +} + +define internal i32 @funInternal2() { +entry: + %x = call i32 @funInternal1() + ret i32 %x +} + +define i32 @funExternal() { +entry: + %x = call i32 @funInternal2() + ret i32 %x +} + +define i32 @funExternal2() { +entry: + %x = call i32 @funInternal0() + ret i32 %x +} Index: test/tools/llvm-split/scc-comdat.ll =================================================================== --- /dev/null +++ test/tools/llvm-split/scc-comdat.ll @@ -0,0 +1,32 @@ +; All functions in the same comdat group must +; be in the same module + +; RUN: llvm-split -j=2 -preserve-locals -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK1 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK0 %s + +; CHECK0: declare i32 @fun1 +; CHECK0: declare i32 @fun2 +; CHECK0: declare i32 @fun3 + +; CHECK1: define internal i32 @fun1 +; CHECK1: define internal i32 @fun2 +; CHECK1: define i32 @fun3 + +$fun = comdat any + +define internal i32 @fun1() section ".text.funs" comdat($fun) { +entry: + ret i32 0 +} + +define internal i32 @fun2() section ".text.funs" comdat($fun) { +entry: + ret i32 0 +} + +define i32 @fun3() section ".text.funs" comdat($fun) { +entry: + ret i32 0 +} + Index: test/tools/llvm-split/scc-constants.ll =================================================================== --- /dev/null +++ test/tools/llvm-split/scc-constants.ll @@ -0,0 +1,48 @@ +; All of the functions in this module must end up +; in the same partition. + +; RUN: llvm-split -j=2 -preserve-locals -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK1 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK0 %s + +; CHECK0: declare i32 @foo +; CHECK0: declare i32 @baz +; CHECK0: declare i32 @bar +; CHECK0: declare i32 @bar2 + +; CHECK1: @bla +; CHECK1: @bla2 +; CHECK1: define internal i32 @foo +; CHECK1: define internal i32 @baz +; CHECK1: define i32 @bar +; CHECK1: define i32 @bar2 + +%struct.anon = type { i64, i64 } + +@bla = internal global %struct.anon { i64 1, i64 2 }, align 8 +@bla2 = internal global %struct.anon { i64 1, i64 2 }, align 8 + +define internal i32 @foo() { +entry: + store i64 5, i64* getelementptr inbounds (%struct.anon, %struct.anon* @bla, i32 0, i32 0), align 8 + store i32 -1, i32* bitcast (i64* getelementptr inbounds (%struct.anon, %struct.anon* @bla2, i32 0, i32 1) to i32*), align 8 + ret i32 0 +} + +define internal i32 @baz() { +entry: + store i64 5, i64* getelementptr inbounds (%struct.anon, %struct.anon* @bla, i32 0, i32 0), align 8 + store i32 -1, i32* bitcast (i64* getelementptr inbounds (%struct.anon, %struct.anon* @bla2, i32 0, i32 1) to i32*), align 8 + ret i32 0 +} + +define i32 @bar() { + %call = call i32 @foo() + ret i32 0 +} + +define i32 @bar2() { + %call = call i32 @baz() + ret i32 0 +} + Index: test/tools/llvm-split/scc-cycle.ll =================================================================== --- /dev/null +++ test/tools/llvm-split/scc-cycle.ll @@ -0,0 +1,44 @@ +; All of the functions in this module must end up +; in the same partition. + +; Mutually recursive calls +; RUN: llvm-split -j=2 -preserve-locals -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK1 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK0 %s + +; CHECK0: declare i32 @funInternal0 +; CHECK0: declare i32 @funInternal1 +; CHECK0: declare i32 @funExternal0 +; CHECK0: declare i32 @funExternal1 + +; CHECK1: define internal i32 @funInternal0 +; CHECK1: define internal i32 @funInternal1 +; CHECK1: define i32 @funExternal0 +; CHECK1: define i32 @funExternal1 + +define internal i32 @funInternal0() { +entry: + %x = call i32 @funInternal1() + ret i32 %x +} + +define internal i32 @funInternal1() { +entry: + %x = call i32 @funInternal0() + ret i32 %x +} + +; Extrnal functions + +define i32 @funExternal0() { +entry: + %x = call i32 @funInternal0() + ret i32 %x +} + +define i32 @funExternal1() { +entry: + %x = call i32 @funInternal1() + ret i32 %x +} + Index: test/tools/llvm-split/scc-global2global.ll =================================================================== --- /dev/null +++ test/tools/llvm-split/scc-global2global.ll @@ -0,0 +1,28 @@ +; All of the functions and globals in this module must end up +; in the same partition. + +; RUN: llvm-split -j=2 -preserve-locals -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK1 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK0 %s + +; CHECK0: declare %struct.anon* @local0 +; CHECK0: declare i8** @local1 + +; CHECK1: @bla +; CHECK1: @ptr +; CHECK1: define internal %struct.anon* @local0 +; CHECK1: define internal i8** @local1 + +%struct.anon = type { i64, i64 } + +@bla = internal global %struct.anon { i64 1, i64 2 }, align 8 +@ptr = internal global i8* bitcast (%struct.anon* @bla to i8*), align 4 + +define internal %struct.anon* @local0() { + ret %struct.anon* @bla +} + +define internal i8** @local1() { + ret i8** @ptr +} + Index: tools/llvm-split/llvm-split.cpp =================================================================== --- tools/llvm-split/llvm-split.cpp +++ tools/llvm-split/llvm-split.cpp @@ -35,6 +35,10 @@ static cl::opt NumOutputs("j", cl::Prefix, cl::init(2), cl::desc("Number of output files")); +static cl::opt + PreserveLocals("preserve-locals", cl::Prefix, cl::init(false), + cl::desc("Split without externalizing locals")); + int main(int argc, char **argv) { LLVMContext &Context = getGlobalContext(); SMDiagnostic Err; @@ -61,7 +65,7 @@ // Declare success. Out->keep(); - }); + }, PreserveLocals); return 0; }