diff --git a/llvm/test/tools/llvm-reduce/operands-skip.ll b/llvm/test/tools/llvm-reduce/operands-skip.ll --- a/llvm/test/tools/llvm-reduce/operands-skip.ll +++ b/llvm/test/tools/llvm-reduce/operands-skip.ll @@ -1,6 +1,13 @@ ; RUN: llvm-reduce %s -o %t --delta-passes=operands-skip --test FileCheck --test-arg %s --test-arg --match-full-lines --test-arg --check-prefix=INTERESTING --test-arg --input-file ; RUN: FileCheck %s --input-file %t --check-prefixes=REDUCED +; RUN: llvm-reduce -j 2 %s -o %t.1 --delta-passes=operands-skip --test FileCheck --test-arg %s --test-arg --match-full-lines --test-arg --check-prefix=INTERESTING --test-arg --input-file +; RUN: FileCheck %s --input-file %t.1 --check-prefixes=REDUCED + +; RUN: llvm-reduce -j 4 %s -o %t.2 --delta-passes=operands-skip --test FileCheck --test-arg %s --test-arg --match-full-lines --test-arg --check-prefix=INTERESTING --test-arg --input-file +; RUN: FileCheck %s --input-file %t.2 --check-prefixes=REDUCED + + ; INTERESTING: store i32 43, i32* {{(%imm|%indirect)}}, align 4 ; REDUCED: store i32 43, i32* %imm, align 4 diff --git a/llvm/tools/llvm-reduce/CMakeLists.txt b/llvm/tools/llvm-reduce/CMakeLists.txt --- a/llvm/tools/llvm-reduce/CMakeLists.txt +++ b/llvm/tools/llvm-reduce/CMakeLists.txt @@ -3,6 +3,7 @@ AllTargetsCodeGens AllTargetsDescs AllTargetsInfos + BitReader BitWriter CodeGen Core diff --git a/llvm/tools/llvm-reduce/deltas/Delta.cpp b/llvm/tools/llvm-reduce/deltas/Delta.cpp --- a/llvm/tools/llvm-reduce/deltas/Delta.cpp +++ b/llvm/tools/llvm-reduce/deltas/Delta.cpp @@ -15,9 +15,11 @@ #include "Delta.h" #include "ReducerWorkItem.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ThreadPool.h" #include "llvm/Support/ToolOutputFile.h" #include #include @@ -37,6 +39,16 @@ cl::desc("Write temporary files as bitcode, instead of textual IR"), cl::init(false)); +#ifdef LLVM_ENABLE_THREADS +static cl::opt NumJobs( + "j", + cl::desc("Maximum number of threads to use to process chunks. Set to 1 to " + "disables parallelism."), + cl::init(1)); +#else +unsigned NumJobs = 1; +#endif + void writeOutput(ReducerWorkItem &M, llvm::StringRef Message); bool isReduced(ReducerWorkItem &M, TestRunner &Test, @@ -120,7 +132,8 @@ // modified module if the chunk resulted in a reduction. template static std::unique_ptr -CheckChunk(Chunk &ChunkToCheckForUninterestingness, TestRunner &Test, +CheckChunk(Chunk &ChunkToCheckForUninterestingness, + std::unique_ptr Clone, TestRunner &Test, function_ref ExtractChunksFromModule, std::set &UninterestingChunks, std::vector &ChunksStillConsideredInteresting) { @@ -137,9 +150,6 @@ C != ChunkToCheckForUninterestingness; }); - // Clone module before hacking it up.. - std::unique_ptr Clone = - cloneReducerWorkItem(Test.getProgram()); // Generate Module with only Targets inside Current Chunks Oracle O(CurrentChunks); ExtractChunksFromModule(O, *Clone); @@ -169,6 +179,36 @@ return Clone; } +template +SmallString<0> ProcessChunkFromSerializedBitcode( + Chunk &ChunkToCheckForUninterestingness, TestRunner &Test, + function_ref ExtractChunksFromModule, + std::set &UninterestingChunks, + std::vector &ChunksStillConsideredInteresting, + SmallString<0> &OriginalBC, std::atomic &AnyReduced) { + LLVMContext Ctx; + Expected> MOrErr = parseBitcodeFile( + MemoryBufferRef(StringRef(OriginalBC.data(), OriginalBC.size()), + ""), + Ctx); + if (!MOrErr) + report_fatal_error("Failed to read bitcode"); + auto CloneMMM = std::make_unique(); + CloneMMM->M = std::move(MOrErr.get()); + + SmallString<0> Result; + if (std::unique_ptr ChunkResult = + CheckChunk(ChunkToCheckForUninterestingness, std::move(CloneMMM), + Test, ExtractChunksFromModule, UninterestingChunks, + ChunksStillConsideredInteresting)) { + raw_svector_ostream BCOS(Result); + WriteBitcodeToFile(*ChunkResult->M, BCOS); + // Communicate that the task reduced a chunk. + AnyReduced = true; + } + return Result; +} + /// Runs the Delta Debugging algorithm, splits the code into chunks and /// reduces the amount of chunks that are considered interesting by the /// given test. @@ -207,19 +247,112 @@ increaseGranularity(ChunksStillConsideredInteresting); } + std::atomic AnyReduced; + std::unique_ptr ChunkThreadPool; + if (NumJobs > 1) + ChunkThreadPool = + std::make_unique(hardware_concurrency(NumJobs)); + bool FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity; do { FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = false; std::set UninterestingChunks; - for (Chunk &ChunkToCheckForUninterestingness : - reverse(ChunksStillConsideredInteresting)) { - std::unique_ptr Result = CheckChunk( - ChunkToCheckForUninterestingness, Test, ExtractChunksFromModule, - UninterestingChunks, ChunksStillConsideredInteresting); + + // When running with more than one thread, serialize the original bitcode + // to OriginalBC. + SmallString<0> OriginalBC; + if (NumJobs > 1) { + raw_svector_ostream BCOS(OriginalBC); + WriteBitcodeToFile(*Test.getProgram().M, BCOS); + } + + std::deque>> TaskQueue; + for (auto I = ChunksStillConsideredInteresting.rbegin(), + E = ChunksStillConsideredInteresting.rend(); + I != E; ++I) { + std::unique_ptr Result = nullptr; + unsigned WorkLeft = std::distance(I, E); + + // Run in parallel mode, if the user requested more than one thread and + // there are at least a few chunks to process. + if (NumJobs > 1 && WorkLeft > 1) { + unsigned NumInitialTasks = std::min(WorkLeft, unsigned(NumJobs)); + unsigned NumChunksProcessed = 0; + + ThreadPool &ChunkThreadPool_ = *ChunkThreadPool; + TaskQueue.clear(); + + AnyReduced = false; + // Queue jobs to process NumInitialTasks chunks in parallel using + // ChunkThreadPool. When the tasks are added to the pool, parse the + // original module from OriginalBC with a fresh LLVMContext object. This + // ensures that the cloned module of each task uses an independent + // LLVMContext object. If a task reduces the input, serialize the result + // back in the corresponding Result element. + for (unsigned J = 0; J < NumInitialTasks; ++J) { + TaskQueue.emplace_back(ChunkThreadPool_.async( + [J, I, &Test, &ExtractChunksFromModule, &UninterestingChunks, + &ChunksStillConsideredInteresting, &OriginalBC, &AnyReduced]() { + return ProcessChunkFromSerializedBitcode( + *(I + J), Test, ExtractChunksFromModule, + UninterestingChunks, ChunksStillConsideredInteresting, + OriginalBC, AnyReduced); + })); + } + + // Start processing results of the queued tasks. We wait for the first + // task in the queue to finish. If it reduced a chunk, we parse the + // result and exit the loop. + // Otherwise we will try to schedule a new task, if + // * no other pending job reduced a chunk and + // * we have not reached the end of the chunk. + while (!TaskQueue.empty()) { + auto &Future = TaskQueue.front(); + Future.wait(); + + NumChunksProcessed++; + SmallString<0> Res = Future.get(); + TaskQueue.pop_front(); + if (Res.empty()) { + unsigned NumScheduledTasks = NumChunksProcessed + TaskQueue.size(); + if (!AnyReduced && I + NumScheduledTasks != E) { + Chunk &ChunkToCheck = *(I + NumScheduledTasks); + TaskQueue.emplace_back(ChunkThreadPool_.async( + [&Test, &ExtractChunksFromModule, &UninterestingChunks, + &ChunksStillConsideredInteresting, &OriginalBC, + &ChunkToCheck, &AnyReduced]() { + return ProcessChunkFromSerializedBitcode( + ChunkToCheck, Test, ExtractChunksFromModule, + UninterestingChunks, ChunksStillConsideredInteresting, + OriginalBC, AnyReduced); + })); + } + continue; + } + + Expected> MOrErr = parseBitcodeFile( + MemoryBufferRef(StringRef(Res.data(), Res.size()), + ""), + Test.getProgram().M->getContext()); + if (!MOrErr) + report_fatal_error("Failed to read bitcode"); + Result = std::make_unique(); + Result->M = std::move(MOrErr.get()); + break; + } + // Forward I to the last chunk processed in parallel. + I += NumChunksProcessed - 1; + } else { + Result = CheckChunk(*I, cloneReducerWorkItem(Test.getProgram()), Test, + ExtractChunksFromModule, UninterestingChunks, + ChunksStillConsideredInteresting); + } + if (!Result) continue; + Chunk &ChunkToCheckForUninterestingness = *I; FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = true; UninterestingChunks.insert(ChunkToCheckForUninterestingness); ReducedProgram = std::move(Result);