diff --git a/llvm/tools/LLVMBuild.txt b/llvm/tools/LLVMBuild.txt --- a/llvm/tools/LLVMBuild.txt +++ b/llvm/tools/LLVMBuild.txt @@ -17,6 +17,7 @@ [common] subdirectories = bugpoint + bp2 dsymutil llc lli diff --git a/llvm/tools/bp2/CMakeLists.txt b/llvm/tools/bp2/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/tools/bp2/CMakeLists.txt @@ -0,0 +1,36 @@ +set(LLVM_LINK_COMPONENTS + AllTargetsAsmParsers + AllTargetsCodeGens + AllTargetsDescs + AllTargetsInfos + Analysis + BitWriter + CodeGen + Core + IPO + IRReader + AggressiveInstCombine + InstCombine + Instrumentation + Linker + ObjCARCOpts + ScalarOpts + Support + Target + TransformUtils + Vectorize + ) + +# Support plugins. +set(LLVM_NO_DEAD_STRIP 1) + +add_llvm_tool(bp2 + bp2.cpp + TestRunner.cpp + DeltaManager.cpp + deltas/RemoveFunctions.cpp + + DEPENDS + intrinsics_gen + ) +export_executable_symbols(bp2) diff --git a/llvm/tools/bp2/DeltaManager.h b/llvm/tools/bp2/DeltaManager.h new file mode 100644 --- /dev/null +++ b/llvm/tools/bp2/DeltaManager.h @@ -0,0 +1,22 @@ +#include "deltas/RemoveFunctions.h" +#include "deltas/Delta.h" +#include "TestRunner.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Bitcode/BitcodeWriter.h" + + +namespace llvm { + +class DeltaManager { + public: + DeltaManager(Module* P, TestRunner T); + void runDeltaPasses(); + + private: + Module* Program; + TestRunner Tester; +}; + +} \ No newline at end of file diff --git a/llvm/tools/bp2/DeltaManager.cpp b/llvm/tools/bp2/DeltaManager.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/bp2/DeltaManager.cpp @@ -0,0 +1,12 @@ +#include "DeltaManager.h" + +using namespace llvm; + +DeltaManager::DeltaManager(Module* P, TestRunner T) + : Program(P), Tester(T) {} + +void DeltaManager::runDeltaPasses() { + outs() << "Reducing functions...\n"; + Delta::run(Program, Tester); + // TODO: Implement the remaining deltas.. +} diff --git a/llvm/tools/LLVMBuild.txt b/llvm/tools/bp2/LLVMBuild.txt copy from llvm/tools/LLVMBuild.txt copy to llvm/tools/bp2/LLVMBuild.txt --- a/llvm/tools/LLVMBuild.txt +++ b/llvm/tools/bp2/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./tools/LLVMBuild.txt ------------------------------------*- Conf -*--===; +;===- ./tools/bugpoint/LLVMBuild.txt ---------------------------*- Conf -*--===; ; ; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ; See https://llvm.org/LICENSE.txt for license information. @@ -14,48 +14,19 @@ ; ;===------------------------------------------------------------------------===; -[common] -subdirectories = - bugpoint - dsymutil - llc - lli - llvm-ar - llvm-as - llvm-bcanalyzer - llvm-cat - llvm-cfi-verify - llvm-cov - llvm-cvtres - llvm-diff - llvm-dis - llvm-dwarfdump - llvm-dwp - llvm-elfabi - llvm-exegesis - llvm-extract - llvm-jitlistener - llvm-jitlink - llvm-link - llvm-lto - llvm-mc - llvm-mca - llvm-modextract - llvm-mt - llvm-nm - llvm-objcopy - llvm-objdump - llvm-pdbutil - llvm-profdata - llvm-rc - llvm-rtdyld - llvm-size - llvm-split - llvm-undname - opt - verify-uselistorder - [component_0] -type = Group -name = Tools -parent = $ROOT +type = Tool +name = bp2 +parent = Tools +required_libraries = + AsmParser + BitReader + BitWriter + CodeGen + IRReader + IPO + Instrumentation + Linker + ObjCARC + Scalar + all-targets diff --git a/llvm/tools/bp2/TestRunner.h b/llvm/tools/bp2/TestRunner.h new file mode 100644 --- /dev/null +++ b/llvm/tools/bp2/TestRunner.h @@ -0,0 +1,28 @@ +#ifndef LLVM_TOOLS_BP2_TESTRUNNER_H +#define LLVM_TOOLS_BP2_TESTRUNNER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Error.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Program.h" + +namespace llvm { + +class TestRunner { + public: + TestRunner(std::string OriginalF, std::string TestN, + std::vector TArgs); + // Runs the interestingness test for the given filename + // @returns 1 if the Filename is interesting, 0 if otherwise. + int run(std::string Filename); + std::string getOriginalFilename() { return OriginalFilename; } + + private: + std::string OriginalFilename; + std::string TestName; + std::vector TestArgs; +}; + +} + +#endif \ No newline at end of file diff --git a/llvm/tools/bp2/TestRunner.cpp b/llvm/tools/bp2/TestRunner.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/bp2/TestRunner.cpp @@ -0,0 +1,40 @@ +#include "TestRunner.h" + +using namespace llvm; + +/// RunProgramWithTimeout - This function provides an alternate interface +/// to the sys::Program::ExecuteAndWait interface. +/// @see sys::Program::ExecuteAndWait +int runProgramWithTimeout(StringRef ProgramPath, ArrayRef Args, + unsigned NumSeconds = 0, unsigned MemoryLimit = 0, + std::string *ErrMsg = nullptr) { + StringRef SR = ""; + Optional Redirects[3] = {SR, SR, SR}; + return sys::ExecuteAndWait(ProgramPath, Args, None, Redirects, NumSeconds, + MemoryLimit, ErrMsg); +} + +TestRunner::TestRunner(std::string OriginalF, std::string TestN, + std::vector TArgs) + : OriginalFilename(OriginalF), TestName(TestN), TestArgs(TArgs) {} + + +int TestRunner::run(std::string Filename) { + std::vector ProgramArgs; + ProgramArgs.push_back(TestName.c_str()); + ProgramArgs.push_back(Filename.c_str()); + + for (auto Arg : TestArgs) + ProgramArgs.push_back(Arg.c_str()); + + int Result = runProgramWithTimeout(TestName, ProgramArgs); + + if (Result < 0) { + Error E = make_error("Error running interesting-ness test\n", + inconvertibleErrorCode()); + outs() << toString(std::move(E)); + exit(1); + } + + return !Result; +} \ No newline at end of file diff --git a/llvm/tools/bp2/bp2.cpp b/llvm/tools/bp2/bp2.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/bp2/bp2.cpp @@ -0,0 +1,72 @@ +//===- bugpoint.cpp - The LLVM Bugpoint utility ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This program is an automated compiler debugger tool. It is used to narrow +// down miscompilations and crash problems to a specific pass in the compiler, +// and the specific Module or Function input that is causing the problem. +// +//===----------------------------------------------------------------------===// + +#include "DeltaManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/SourceMgr.h" +#include + +using namespace llvm; + +static cl::opt InputFilename(cl::Positional, cl::Required, + cl::desc("")); + +static cl::opt + TestName("test", cl::Required, + cl::desc("Name of the interesting-ness test to be run")); + +static cl::list + TestArguments("test-arg", cl::ZeroOrMore, + cl::desc("Arguments passed onto the interesting-ness test")); + +// Parses IR into a Module and verifies it +std::unique_ptr parseInputFile(StringRef Filename, LLVMContext &Ctxt) { + SMDiagnostic Err; + std::unique_ptr Result = parseIRFile(Filename, Err, Ctxt); + if (!Result) { + Err.print("bp2", errs()); + return Result; + } + + if (verifyModule(*Result, &errs())) { + errs() << "bp2: " << Filename << ": error: input module is broken!\n"; + return std::unique_ptr(); + } + + return Result; +} + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + + cl::ParseCommandLineOptions(argc, argv, + "LLVM automatic testcase reducer. See\nhttp://" + "llvm.org/cmds/bugpoint.html" + " for more information.\n"); + + LLVMContext Context; + std::unique_ptr P = parseInputFile(InputFilename, Context); + + std::vector TestArgs(std::move(TestArguments)); + TestRunner Tester(InputFilename, TestName, TestArgs); + + DeltaManager DeltaMgr(P.get(), Tester); + DeltaMgr.runDeltaPasses(); + + return 0; +} diff --git a/llvm/tools/bp2/bugpoint.md b/llvm/tools/bp2/bugpoint.md new file mode 100644 --- /dev/null +++ b/llvm/tools/bp2/bugpoint.md @@ -0,0 +1,72 @@ +# Bugpoint Redesign +Author: Diego Treviño (diegotf@google.com) + +Date: 2016-06-05 + +Status: Draft + + +## Introduction +As use of bugpoint has grown several areas of improvement have been identified through years of use: confusing to use, slow, it doesn’t always produce high quality test cases, etc. This document proposes a new approach with a narrower focus: minimization of IR test cases. + + +## Proposed New Design + + +### Narrow focus: test-case reduction +The main focus will be a code reduction strategy to obtain much smaller test cases that still have the same property as the original one. This will be done via classic delta debugging and by adding some IR-specific reductions (e.g. replacing globals, removing unused instructions, etc), similar to what already exists, but with more in-depth minimization. + + +Granted, if the community differs on this proposal, the legacy code could still be present in the tool, but with the caveat of still being documented and designed towards delta reduction. + + +### Command-Line Options +We are proposing to reduce the plethora of bugpoint’s options to just two: an interesting-ness test and the arguments for said test, similar to other delta reduction tools such as CReduce, Delta, and Lithium; the tool should feel less cluttered, and there should also be no uncertainty about how to operate it. + + +The interesting-ness test that’s going to be run to reduce the code is given by name: + `--test=` +If a `--test` option is not given, the program exits; this option is similar to bugpoint’s current `-compile-custom` option, which lets the user run a custom script. + + +The interesting-ness test would be defined as a script that returns 0 when the IR achieves a user-defined behaviour (e.g. failure to compile on clang) and a nonzero value when otherwise. Leaving the user the freedom to determine what is and isn’t interesting to the tool, and thus, streamlining the process of reducing a test-case. + + +If the test accepts any arguments (excluding the input ll/bc file), they are given via the following flag: + `--test_args=` +If unspecified, the test is run as given. It’s worth noting that the input file would be passed as a parameter to the test, similar how `-compile-custom` currently operates. + + +### Implementation +The tool would behave similar to CReduce’s functionality in that it would have a list of passes that try to minimize the given test-case. We should be able to modularize the tool’s behavior, as well as making it easier to maintain and expand. + + +The first version of this redesign would try to: + + +* Split the code into chunks and discard those that fail the given test +* Discard functions, instructions and metadata that don’t influence the interesting-ness test +* Remove unused parameters from functions +* Eliminate unvisited conditional paths +* Rename variables to more regular ones (such as “a”, “b”, “c”, etc.) + + +Once these passes are implemented, more meaningful reductions (such as type reduction) would be added to the tool, to even further reduce IR. + + +## Background on historical bugpoint issues + + +### Root Cause Analysis +Presently, bugpoint takes a long time to find the source problem in a given IR file, mainly due to the fact that it tries to debug the input by running various strategies to classify the bug, which in turn run multiple optimizer and compilation passes over the input, taking up a lot of time. Furthermore, when the IR crashes, it tries to reduce it by performing some sub-optimal passes (e.g. a lot of unreachable blocks), and sometimes even fails to minimize at all. + + +### "Quirky" Interface +Bugpoint’s current interface overwhelms and confuses the user, the help screen alone ends up confusing rather providing guidance, as seen below: + +![Bugpoint's help option showcase](https://lh6.googleusercontent.com/sbpaSVHzpVVZKKAgHL9gvfzTWdgh3ju0KiDYql6WmWZfDYrdauOJMcuo9PP_V1dq8JQfMHOSKTv3lJcSpVytUyU8r5tJ2KTlGB0b2ve7jsZ3nVX8K8ItAbsA0JWkFKw67VJnq99m) + +And, not only are there numerous features and options, but some of them also work in unexpected ways and most of the time the user ends up using a custom script. Pruning and simplifying the interface will be worth considering in order to make the tool more useful in the general case and easier to maintain. + + + diff --git a/llvm/tools/bp2/deltas/Chunk.h b/llvm/tools/bp2/deltas/Chunk.h new file mode 100644 --- /dev/null +++ b/llvm/tools/bp2/deltas/Chunk.h @@ -0,0 +1,16 @@ +#ifndef LLVM_TOOLS_BP2_DELTAS_CHUNK_H +#define LLVM_TOOLS_BP2_DELTAS_CHUNK_H + +struct Chunk { + int begin; + int end; + + friend bool operator!=(const Chunk &C1, const Chunk &C2) { + return C1.begin != C2.begin && C1.end != C2.end; + } + friend bool operator<(const Chunk& C1, const Chunk &C2) { + return C1.end < C2.begin; + } +}; + +#endif \ No newline at end of file diff --git a/llvm/tools/bp2/deltas/Delta.h b/llvm/tools/bp2/deltas/Delta.h new file mode 100644 --- /dev/null +++ b/llvm/tools/bp2/deltas/Delta.h @@ -0,0 +1,116 @@ +#include "Chunk.h" +#include "TestRunner.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/Module.h" +#include +#include +#include +#include + +namespace llvm { + +inline void printChunks(std::vector Chunks, bool Oneline = false) { + for(auto C : Chunks) { + if (!Oneline) outs() << '\t'; + outs() << "[" << C.begin; + if (C.end - C.begin != 0) + outs() << "," << C.end; + outs() << "]"; + if (!Oneline) outs() << '\n'; + } +} + +// Counts the amount of lines for a given file +inline unsigned getLines(std::string Filename) { + unsigned Lines = 0; + std::string CurrLine; + std::ifstream FileStream(Filename); + + while (std::getline(FileStream, CurrLine)) + ++Lines; + + return Lines; +} + +inline bool increaseGranularity(std::vector &Chunks) { + outs() << "Increasing granularity...\n"; + std::vector NewChunks; + bool SplitOne = false; + + for(auto C : Chunks) { + int Half = (C.begin + C.end) / 2; + if (C.end - C.begin == 0) + NewChunks.push_back(C); + else { + NewChunks.push_back({ C.begin, Half }); + NewChunks.push_back({ Half + 1, C.end }); + SplitOne = true; + } + } + if (SplitOne) { + Chunks = NewChunks; + outs() << "Success! New Chunks:\n"; + printChunks(Chunks); + } + return SplitOne; +} + +template class Delta { + public: + // Default Delta Debugging Algorithm + static void run(Module* Program, TestRunner Test) { + int TargetCount = D::getTargetCount(Program); + std::vector Chunks = {{ 1, TargetCount }}; + std::vector CurrentChunks; + std::set UninterestingChunks; + + std::string SmallestFilename = Test.getOriginalFilename(); + // Maybe also save the smallest Module? + + // TODO: Run initial pass to verify input is interesting + increaseGranularity(Chunks); + + do { + UninterestingChunks = {}; + for (int I = Chunks.size() - 1; I >= 0; --I) { + CurrentChunks = {}; + + for(auto C : Chunks) + if (!UninterestingChunks.count(C) && C != Chunks[I]) + CurrentChunks.push_back(C); + + outs() << "Testing with: "; + printChunks(CurrentChunks, /*Oneline*/ true); + + std::string CurrentFilename = D::prepareIR(Program, CurrentChunks); + outs() << " | " << CurrentFilename; + + if (Test.run(CurrentFilename) && + getLines(CurrentFilename) < getLines(SmallestFilename)) { + SmallestFilename = CurrentFilename; + outs() << " **** SUCCESS | lines: " << getLines(SmallestFilename); + } + outs() << "\n"; + } + // Delete proven Uninteresting chunks + auto It = Chunks.begin(), E = Chunks.end(); + while(It != E) { + if (UninterestingChunks.count(*It)) + It = Chunks.erase(It); + else + ++It; + } + } while(!UninterestingChunks.empty() || increaseGranularity(Chunks)); + outs() << "Couldn't increase granularity anymore.\n"; + } +}; + +} diff --git a/llvm/tools/bp2/deltas/RemoveFunctions.h b/llvm/tools/bp2/deltas/RemoveFunctions.h new file mode 100644 --- /dev/null +++ b/llvm/tools/bp2/deltas/RemoveFunctions.h @@ -0,0 +1,22 @@ +#include "Chunk.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Attributes.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/FunctionComparator.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include +#include +#include + +namespace llvm { + +class RemoveFunctions { + public: + static int getTargetCount(Module* Program); + static std::string prepareIR(Module* Program, std::vector Chunks); +}; + +} \ No newline at end of file diff --git a/llvm/tools/bp2/deltas/RemoveFunctions.cpp b/llvm/tools/bp2/deltas/RemoveFunctions.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/bp2/deltas/RemoveFunctions.cpp @@ -0,0 +1,110 @@ +#include "RemoveFunctions.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Use.h" +#include "llvm/Support/Casting.h" +#include +#include + +using namespace llvm; + +bool writeProgramToFile(const std::string &Filename, int FD, + const Module &M) { + ToolOutputFile Out(Filename, FD); + WriteBitcodeToFile(M, Out.os()); + Out.os().close(); + + if (!Out.os().has_error()) { + Out.keep(); + return false; + } + return true; +} + +// Clones original program, and deletes Functions that aren't in desired chunks +std::string RemoveFunctions::prepareIR(Module *Program, + std::vector ChunksToKeep) { + std::string NewBitcode; + std::unique_ptr Clone = CloneModule(*Program); + std::set ToKeep; + + // Get all functions inside desired chunks + int I = 0, FunctionCount = 1; + for(auto &F : *Clone) { + if (!F.isDeclaration()) { + if (FunctionCount >= ChunksToKeep[I].begin && + FunctionCount <= ChunksToKeep[I].end) { + ToKeep.insert(&F); + } + else if (FunctionCount > ChunksToKeep[I].end) + ++I; + ++FunctionCount; + } + } + + std::vector ToRemove; + + // Delete out-of-chunk functions, and replace their calls with null + for (Function &F : *Clone) { + if (!F.isDeclaration() && !ToKeep.count(&F)) { + F.replaceAllUsesWith(UndefValue::get(F.getType())); + ToRemove.push_back(&F); + } + } + + for (auto *F : ToRemove) { + F->eraseFromParent(); + } + + std::vector ToRemoveI; + + // Clean up null calls + for(auto &F : *Clone) + for(auto &BB : F) + for(auto &I : BB) + if (CallInst *callInst = dyn_cast(&I)) + if (!callInst->getCalledFunction()) { + I.replaceAllUsesWith(UndefValue::get(I.getType())); + ToRemoveI.push_back(&I); + } + + for (auto *I : ToRemoveI) { + I->eraseFromParent(); + } + + + // Write new module to file + SmallString<128> UniqueFilename; + int UniqueFD; + std::error_code EC = sys::fs::createUniqueFile("tmp-%%%.bc", UniqueFD, + UniqueFilename); + if (EC) { + errs() << "Error making unique filename: " << EC.message() << "!\n"; + exit(1); + } + NewBitcode = UniqueFilename.str(); + + if (writeProgramToFile(NewBitcode, UniqueFD, *Clone)) { + errs() << "Error emitting bitcode to file '" << NewBitcode + << "'!\n"; + exit(1); + } + + return NewBitcode; +} + +int RemoveFunctions::getTargetCount(Module* M) { + outs() << "----------------------------\n"; + outs() << "Chunk Index Reference:\n"; + int FunctionCount = 0; + for (auto &F : *M) + if (!F.isDeclaration()) { + ++FunctionCount; + outs() << "\t" << FunctionCount << ": " << F.getName() << "\n"; + } + outs() << "----------------------------\n"; + return FunctionCount; +} \ No newline at end of file