Index: docs/FuzzingLLVM.rst =================================================================== --- docs/FuzzingLLVM.rst +++ docs/FuzzingLLVM.rst @@ -100,6 +100,28 @@ % bin/llvm-isel-fuzzer--aarch64-O0-gisel +llvm-opt-fuzzer +--------------- + +A |LLVM IR fuzzer| aimed at finding bugs in optimization passes. + +It receives optimzation pipeline and runs it for each fuzzer input. + +Interface of this fuzzer almost directly mirrors ``llvm-isel-fuzzer``. Both +``mtriple`` and ``passes`` arguments are required. Passes are specified in a +format suitable for the new pass manager. + +.. code-block:: shell + + % bin/llvm-opt-fuzzer -ignore_remaining_args=1 -mtriple x86_64 -passes instcombine + +Similarly to the ``llvm-isel-fuzzer`` arguments in some predefined configurations +might be embedded directly into the binary file name: + +.. code-block:: shell + + % bin/llvm-opt-fuzzer--x86_64-instcombine + llvm-mc-assemble-fuzzer ----------------------- Index: include/llvm/FuzzMutate/FuzzerCLI.h =================================================================== --- include/llvm/FuzzMutate/FuzzerCLI.h +++ include/llvm/FuzzMutate/FuzzerCLI.h @@ -36,6 +36,10 @@ /// of passing in command line arguments in the normal way. void handleExecNameEncodedBEOpts(StringRef ExecName); +/// Handle optimizer options which are encoded in the executable name. +/// Same semantics as in 'handleExecNameEncodedBEOpts'. +void handleExecNameEncodedOptimizerOpts(StringRef ExecName); + using FuzzerTestFun = int (*)(const uint8_t *Data, size_t Size); using FuzzerInitFun = int (*)(int *argc, char ***argv); Index: lib/FuzzMutate/FuzzerCLI.cpp =================================================================== --- lib/FuzzMutate/FuzzerCLI.cpp +++ lib/FuzzMutate/FuzzerCLI.cpp @@ -67,6 +67,40 @@ cl::ParseCommandLineOptions(CLArgs.size(), CLArgs.data()); } +void llvm::handleExecNameEncodedOptimizerOpts(StringRef ExecName) { + // TODO: Refactor parts common with the 'handleExecNameEncodedBEOpts' + std::vector Args{ExecName}; + + auto NameAndArgs = ExecName.split("--"); + if (NameAndArgs.second.empty()) + return; + + SmallVector Opts; + NameAndArgs.second.split(Opts, '-'); + for (StringRef Opt : Opts) { + if (Opt.startswith("instcombine")) { + Args.push_back("-passes=instcombine"); + } else if (Triple(Opt).getArch()) { + Args.push_back("-mtriple=" + Opt.str()); + } else { + errs() << ExecName << ": Unknown option: " << Opt << ".\n"; + exit(1); + } + } + + errs() << NameAndArgs.first << ": Injected args:"; + for (int I = 1, E = Args.size(); I < E; ++I) + errs() << " " << Args[I]; + errs() << "\n"; + + std::vector CLArgs; + CLArgs.reserve(Args.size()); + for (std::string &S : Args) + CLArgs.push_back(S.c_str()); + + cl::ParseCommandLineOptions(CLArgs.size(), CLArgs.data()); +} + int llvm::runFuzzerOnInputs(int ArgC, char *ArgV[], FuzzerTestFun TestOne, FuzzerInitFun Init) { errs() << "*** This tool was not linked to libFuzzer.\n" Index: tools/llvm-opt-fuzzer/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-opt-fuzzer/CMakeLists.txt @@ -0,0 +1,24 @@ +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + Analysis + BitWriter + CodeGen + Core + Coroutines + IPO + IRReader + InstCombine + Instrumentation + FuzzMutate + MC + ObjCARCOpts + ScalarOpts + Support + Target + TransformUtils + Vectorize + Passes +) + +add_llvm_fuzzer(llvm-opt-fuzzer llvm-opt-fuzzer.cpp + DUMMY_MAIN DummyOptFuzzer.cpp) Index: tools/llvm-opt-fuzzer/DummyOptFuzzer.cpp =================================================================== --- /dev/null +++ tools/llvm-opt-fuzzer/DummyOptFuzzer.cpp @@ -0,0 +1,21 @@ +//===--- DummyOptFuzzer.cpp - Entry point to sanity check the fuzzer ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of main so we can build and test without linking libFuzzer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/FuzzMutate/FuzzerCLI.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv); +int main(int argc, char *argv[]) { + return llvm::runFuzzerOnInputs(argc, argv, LLVMFuzzerTestOneInput, + LLVMFuzzerInitialize); +} Index: tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp =================================================================== --- /dev/null +++ tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp @@ -0,0 +1,259 @@ +//===--- llvm-opt-fuzzer.cpp - Fuzzer for instruction selection ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Tool to fuzz optimization passes using libFuzzer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/CodeGen/CommandFlags.h" +#include "llvm/FuzzMutate/FuzzerCLI.h" +#include "llvm/FuzzMutate/IRMutator.h" +#include "llvm/FuzzMutate/Operations.h" +#include "llvm/FuzzMutate/Random.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" + +using namespace llvm; + +static cl::opt + TargetTripleStr("mtriple", cl::desc("Override target triple for module")); + +// Passes to run for this fuzzer instance. Expects new pass manager syntax. +static cl::opt PassPipeline( + "passes", + cl::desc("A textual description of the pass pipeline for testing")); + +static std::unique_ptr Mutator; +static std::unique_ptr TM; + +// This function is mostly copied from the llvm-isel-fuzzer. +// TODO: Move this into FuzzMutate library and reuse. +static std::unique_ptr parseModule(const uint8_t *Data, size_t Size, + LLVMContext &Context) { + + if (Size <= 1) + // We get bogus data given an empty corpus - just create a new module. + return llvm::make_unique("M", Context); + + auto Buffer = MemoryBuffer::getMemBuffer( + StringRef(reinterpret_cast(Data), Size), "Fuzzer input", + /*RequiresNullTerminator=*/false); + + SMDiagnostic Err; + auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context); + if (Error E = M.takeError()) { + errs() << toString(std::move(E)) << "\n"; + return nullptr; + } + return std::move(M.get()); +} + +// This function is copied from the llvm-isel-fuzzer. +// TODO: Move this into FuzzMutate library and reuse. +static size_t writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) { + std::string Buf; + { + raw_string_ostream OS(Buf); + WriteBitcodeToFile(&M, OS); + } + if (Buf.size() > MaxSize) + return 0; + memcpy(Dest, Buf.data(), Buf.size()); + return Buf.size(); +} + +std::unique_ptr createOptMutator() { + std::vector Types{ + Type::getInt1Ty, Type::getInt8Ty, Type::getInt16Ty, Type::getInt32Ty, + Type::getInt64Ty, Type::getFloatTy, Type::getDoubleTy}; + + std::vector> Strategies; + Strategies.push_back( + llvm::make_unique( + InjectorIRStrategy::getDefaultOps())); + Strategies.push_back( + llvm::make_unique()); + + return llvm::make_unique(std::move(Types), std::move(Strategies)); +} + +extern "C" LLVM_ATTRIBUTE_USED size_t LLVMFuzzerCustomMutator( + uint8_t *Data, size_t Size, size_t MaxSize, unsigned int Seed) { + + assert(Mutator && + "IR mutator should have been created during fuzzer initialization"); + + LLVMContext Context; + auto M = parseModule(Data, Size, Context); + if (!M || verifyModule(*M, &errs())) { + errs() << "error: mutator input module is broken!\n"; + return 0; + } + + Mutator->mutateModule(*M, Seed, Size, MaxSize); + +#ifndef NDEBUG + if (verifyModule(*M, &errs())) { + errs() << "mutation result doesn't pass verification\n"; + M->dump(); + abort(); + } +#endif + + return writeModule(*M, Data, MaxSize); +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + assert(TM && "Should have been created during fuzzer initialization"); + + if (Size <= 1) + // We get bogus data given an empty corpus - ignore it. + return 0; + + // Parse module + // + + LLVMContext Context; + auto M = parseModule(Data, Size, Context); + if (!M || verifyModule(*M, &errs())) { + errs() << "error: input module is broken!\n"; + return 0; + } + + // Set up target dependant options + // + + M->setTargetTriple(TM->getTargetTriple().normalize()); + M->setDataLayout(TM->createDataLayout()); + setFunctionAttributes(TM->getTargetCPU(), TM->getTargetFeatureString(), *M); + + // Create pass pipeline + // + + PassBuilder PB(TM.get()); + + LoopAnalysisManager LAM; + FunctionAnalysisManager FAM; + CGSCCAnalysisManager CGAM; + ModulePassManager MPM; + ModuleAnalysisManager MAM; + + FAM.registerPass([&] { return PB.buildDefaultAAPipeline(); }); + PB.registerModuleAnalyses(MAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerFunctionAnalyses(FAM); + PB.registerLoopAnalyses(LAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + + bool Ok = PB.parsePassPipeline(MPM, PassPipeline, false, false); + assert(Ok && "Should have been checked during fuzzer initialization"); + + // Run passes which we need to test + // + + MPM.run(*M, MAM); + + // Check that passes resulted in a correct code + if (verifyModule(*M, &errs())) { + errs() << "Transformation resulted in an invalid module\n"; + abort(); + } + + return 0; +} + +static void handleLLVMFatalError(void *, const std::string &Message, bool) { + // TODO: Would it be better to call into the fuzzer internals directly? + dbgs() << "LLVM ERROR: " << Message << "\n" + << "Aborting to trigger fuzzer exit handling.\n"; + abort(); +} + +extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize( + int *argc, char ***argv) { + EnableDebugBuffering = true; + + // Make sure we print the summary and the current unit when LLVM errors out. + install_fatal_error_handler(handleLLVMFatalError, nullptr); + + // Initialize llvm + // + + InitializeAllTargets(); + InitializeAllTargetMCs(); + + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeCore(Registry); + initializeCoroutines(Registry); + initializeScalarOpts(Registry); + initializeObjCARCOpts(Registry); + initializeVectorization(Registry); + initializeIPO(Registry); + initializeAnalysis(Registry); + initializeTransformUtils(Registry); + initializeInstCombine(Registry); + initializeInstrumentation(Registry); + initializeTarget(Registry); + + // Parse input options + // + + handleExecNameEncodedOptimizerOpts(*argv[0]); + parseFuzzerCLOpts(*argc, *argv); + + // Create TargetMachine + // + + if (TargetTripleStr.empty()) { + errs() << *argv[0] << ": -mtriple must be specified\n"; + exit(1); + } + Triple TargetTriple = Triple(Triple::normalize(TargetTripleStr)); + + std::string Error; + const Target *TheTarget = + TargetRegistry::lookupTarget(MArch, TargetTriple, Error); + if (!TheTarget) { + errs() << *argv[0] << ": " << Error; + exit(1); + } + + TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); + TM.reset(TheTarget->createTargetMachine( + TargetTriple.getTriple(), getCPUStr(), getFeaturesStr(), + Options, getRelocModel(), getCodeModel(), CodeGenOpt::Default)); + assert(TM && "Could not allocate target machine!"); + + // Check that pass pipeline is specified and correct + // + + if (PassPipeline.empty()) { + errs() << *argv[0] << ": at least one pass should be specified\n"; + exit(1); + } + + PassBuilder PB(TM.get()); + ModulePassManager MPM; + if (!PB.parsePassPipeline(MPM, PassPipeline, false, false)) { + errs() << *argv[0] << ": can't parse pass pipeline\n"; + exit(1); + } + + // Create mutator + // + + Mutator = createOptMutator(); + + return 0; +}