Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -408,6 +408,8 @@ option(CLANG_ENABLE_PROTO_FUZZER "Build Clang protobuf fuzzer." OFF) +option(CLANG_ENABLE_ASSEMBLE_PROTO_FUZZER "Build Clang assemble protobuf fuzzer." OFF) + if(NOT CLANG_ENABLE_STATIC_ANALYZER AND (CLANG_ENABLE_ARCMT OR CLANG_ANALYZER_BUILD_Z3)) message(FATAL_ERROR "Cannot disable static analyzer while enabling ARCMT or Z3") endif() Index: cmake/modules/ProtobufMutator.cmake =================================================================== --- cmake/modules/ProtobufMutator.cmake +++ cmake/modules/ProtobufMutator.cmake @@ -1,13 +1,18 @@ set(PBM_PREFIX protobuf_mutator) set(PBM_PATH ${CMAKE_CURRENT_BINARY_DIR}/${PBM_PREFIX}/src/${PBM_PREFIX}) +# TODO: Fix path to be a CMake environment variable +set(PB_PATH /local/mnt/workspace/jocewei/protobuf/install) set(PBM_LIB_PATH ${PBM_PATH}-build/src/libprotobuf-mutator.a) set(PBM_FUZZ_LIB_PATH ${PBM_PATH}-build/src/libfuzzer/libprotobuf-mutator-libfuzzer.a) +set(CMAKE_MODULE_PATH "${PB_PATH};${CMAKE_MODULE_PATH}") ExternalProject_Add(${PBM_PREFIX} PREFIX ${PBM_PREFIX} - GIT_REPOSITORY https://github.com/google/libprotobuf-mutator.git + # TODO: Fix path to be a CMake environment variable + GIT_REPOSITORY file:///local/mnt/workspace/jocewei/libprotobuf-mutator GIT_TAG master CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_PREFIX_PATH=${PB_PATH} CMAKE_CACHE_ARGS -DCMAKE_C_COMPILER:FILEPATH=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER} BUILD_BYPRODUCTS ${PBM_LIB_PATH} ${PBM_FUZZ_LIB_PATH} Index: cmake/modules/ProtobufMutatorMC.cmake =================================================================== --- /dev/null +++ cmake/modules/ProtobufMutatorMC.cmake @@ -0,0 +1,25 @@ +# TODO: Fix double build of protobuf_mutator +set(PBM_PREFIX protobuf_mutator_mc) +set(PBM_PATH ${CMAKE_CURRENT_BINARY_DIR}/${PBM_PREFIX}/src/${PBM_PREFIX}) +# TODO: Fix path to be a CMake environment variable +set(PB_PATH /local/mnt/workspace/jocewei/protobuf/install) +set(PBM_LIB_PATH ${PBM_PATH}-build/src/libprotobuf-mutator.a) +set(PBM_FUZZ_LIB_PATH ${PBM_PATH}-build/src/libfuzzer/libprotobuf-mutator-libfuzzer.a) +set(CMAKE_MODULE_PATH "${PB_PATH};${CMAKE_MODULE_PATH}") + +ExternalProject_Add(${PBM_PREFIX} + PREFIX ${PBM_PREFIX} + # TODO: Fix path to be a CMake environment variable + GIT_REPOSITORY file:///local/mnt/workspace/jocewei/libprotobuf-mutator + GIT_TAG master + CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_PREFIX_PATH=${PB_PATH} + CMAKE_CACHE_ARGS -DCMAKE_C_COMPILER:FILEPATH=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER} + BUILD_BYPRODUCTS ${PBM_LIB_PATH} ${PBM_FUZZ_LIB_PATH} + UPDATE_COMMAND "" + INSTALL_COMMAND "" + ) + +set(ProtobufMutator_INCLUDE_DIRS ${PBM_PATH}) +set(ProtobufMutator_LIBRARIES ${PBM_FUZZ_LIB_PATH} ${PBM_LIB_PATH}) Index: tools/CMakeLists.txt =================================================================== --- tools/CMakeLists.txt +++ tools/CMakeLists.txt @@ -8,6 +8,7 @@ add_clang_subdirectory(clang-fuzzer) add_clang_subdirectory(clang-import-test) add_clang_subdirectory(clang-offload-bundler) +add_clang_subdirectory(llvm-mc-assemble-proto-fuzzer) add_clang_subdirectory(c-index-test) Index: tools/llvm-mc-assemble-proto-fuzzer/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/CMakeLists.txt @@ -0,0 +1,67 @@ +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} FuzzMutate) +set(CXX_FLAGS_NOFUZZ ${CMAKE_CXX_FLAGS}) +set(DUMMY_MAIN DummyMCFuzzer.cpp) +if(LLVM_LIB_FUZZING_ENGINE) + unset(DUMMY_MAIN) +elseif(LLVM_USE_SANITIZE_COVERAGE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer") + set(CXX_FLAGS_NOFUZZ "${CXX_FLAGS_NOFUZZ} -fsanitize=fuzzer-no-link") + unset(DUMMY_MAIN) +endif() + +# Needed by LLVM's CMake checks because this file defines multiple targets. +set(LLVM_OPTIONAL_SOURCES + DummyMCFuzzer.cpp + MCFuzzer.cpp + ExampleMCProtoFuzzer.cpp + ) + +if(CLANG_ENABLE_ASSEMBLE_PROTO_FUZZER) + # Create protobuf .h and .cc files, and put them in a library for use by + # llvm-mc-proto-fuzzer components. + find_package(Protobuf REQUIRED) + add_definitions(-DGOOGLE_PROTOBUF_NO_RTTI) + include_directories(${PROTOBUF_INCLUDE_DIRS}) + include_directories(${CMAKE_CURRENT_BINARY_DIR}) + protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS asm_proto.proto) + set(LLVM_OPTIONAL_SOURCES ${LLVM_OPTIONAL_SOURCES} ${PROTO_SRCS}) + add_clang_library(mcASMProto + ${PROTO_SRCS} + ${PROTO_HDRS} + + LINK_LIBS + ${PROTOBUF_LIBRARIES} + ) + + # Build and include libprotobuf-mutator + include(ProtobufMutatorMC) + include_directories(${ProtobufMutator_INCLUDE_DIRS}) + + # Build the protobuf->C++ translation library and driver. + add_clang_subdirectory(proto-to-asm) + + # Build the fuzzer initialization library. + add_clang_subdirectory(fuzzer-initialize) + + add_clang_executable(llvm-mc-assemble-proto-fuzzer + ${DUMMY_MAIN} + ExampleMCProtoFuzzer.cpp + ) + + set(COMMON_PROTO_FUZZ_LIBRARIES + ${ProtobufMutator_LIBRARIES} + ${PROTOBUF_LIBRARIES} + ${LLVM_LIB_FUZZING_ENGINE} + mcHandleASM + ) + + target_link_libraries(llvm-mc-assemble-proto-fuzzer + PRIVATE + ${COMMON_PROTO_FUZZ_LIBRARIES} + mcASMProto + mcProtoToASM + ) + +endif() + +add_clang_subdirectory(handle-asm) Index: tools/llvm-mc-assemble-proto-fuzzer/DummyMCFuzzer.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/DummyMCFuzzer.cpp @@ -0,0 +1,21 @@ +//===-- DummyClangFuzzer.cpp - Entry point to sanity check fuzzers --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Provides a main() to build without linking libFuzzer. +// +//===----------------------------------------------------------------------===// +#include "llvm/FuzzMutate/FuzzerCLI.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv); + +int main(int argc, char *argv[]) { + return llvm::runFuzzerOnInputs(argc, argv, LLVMFuzzerTestOneInput, + LLVMFuzzerInitialize); +} Index: tools/llvm-mc-assemble-proto-fuzzer/ExampleMCProtoFuzzer.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/ExampleMCProtoFuzzer.cpp @@ -0,0 +1,27 @@ +//===-- ExampleMCProtoFuzzer.cpp - Fuzz Assembler --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a function that runs llvm mc assemble on a single +/// input and uses libprotobuf-mutator to find new inputs. This function is +/// then linked into the Fuzzer library. +/// +//===----------------------------------------------------------------------===// + +#include "asm_proto.pb.h" +#include "handle-asm/handle_asm.h" +#include "proto-to-asm/proto_to_asm.h" +#include "src/libfuzzer/libfuzzer_macro.h" + +using namespace mc_proto_fuzzer; + +DEFINE_BINARY_PROTO_FUZZER(const Assembly& input) { + auto S = FunctionToString(input); + HandleASM(S); +} Index: tools/llvm-mc-assemble-proto-fuzzer/MCFuzzer.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/MCFuzzer.cpp @@ -0,0 +1,29 @@ +//===-- MCFuzzer.cpp - Fuzz MC Layer --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a function that runs assembler on a single +/// input. This function is then linked into the Fuzzer library. +/// +//===----------------------------------------------------------------------===// + +#include "asm_proto.pb.h" +#include "handle-asm/handle_asm.h" +#include "proto-to-asm/proto_to_asm.h" +#include "src/libfuzzer/libfuzzer_macro.h" + +using namespace mc_proto_fuzzer; + +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { return 0; } + +extern "C" int LLVMFuzzerTestOneInput(uint8_t *data, size_t size) { + std::string s((const char *)data, size); + HandleASM(s); + return 0; +} Index: tools/llvm-mc-assemble-proto-fuzzer/README.txt =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/README.txt @@ -0,0 +1,44 @@ +First, create a corpus directory and a directory for output files (for example, +./corpus and ./outputdir). + +Make sure that llvm-mc-assemble-proto-fuzzer and llvm-mc-assemble-proto-to-asm +are in your path. + +Then, run the Python script. Specify any args you would like to run the +fuzzer with, including the corpus directory and the full path to objdump. +For example: + +$ python mcfuzz.py --corpus corpus --max-len 32 --runs 10 --triple riscv32 + --out outputdir --objdump /full/path/to/riscv32-unknown-linux-gnu-objdump + --mattr +c + +Run the script with the --verbose flag if you would like to see the output +of the fuzzer as it runs. + +Use the --riscv-no-aliases flag if you want the generated .s files (from +proto-to-asm) to be printed in their original form (ie to print + `sub x3,x0,x3` to the file instead of + `neg x3,x4`). Note that using this flag (--riscv-no-aliases) may cause + some test cases to fail, as the default behavior of the compiler is to replace + `sub ,x0,` with a `neg` instruction. Aliases are on by default + (NoAliases=False). + +NOTE: If you run the script without specifying an ouptut directory [--out], +the script will terminate after fuzzing the corpus a specified +number [--runs] of times. + +After the first part of the script completes, the corpus directory should +contain the generated corpus files. + +Next, we populate the output directory by iterating through all the files +currently in the corpus. We use the llvm-mc-assemble-proto-to-asm tool to +generate the .s files. We use the llvm-mc-assemble-proto-fuzzer tool, invoked +on each file in the corpus, with -filetype=obj, and -runs=1, to generate a +corresponding .o file. Finally, we call objdump on the .o file to produce a +.objdump file, which can be compared (after some processing) to the .s file. + +Thus, the output directory should contain .o, .s, and .objdump files when the +script concludes. + +The last part of the script runs a diff of all the .s and .objdump files, and +prints out a summary of results (how many diffs passed and how many failed). Index: tools/llvm-mc-assemble-proto-fuzzer/asm_proto.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/asm_proto.proto @@ -0,0 +1,61 @@ +//===-- asm_proto.proto - Protobuf description of ASM ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes a subset of ASM as a protobuf. It is used to +/// more easily find interesting inputs for fuzzing llvm mc layer. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message Register { + enum RegName { + X0 = 1; X1 = 2; X2 = 3; X3 = 4; X4 = 5; X5 = 6; X6 = 7; X7 = 8; X8 = 9; + X9 = 10; X10 = 11; X11 = 12; X12 = 13; X13 = 14; X14 = 15; X15 = 16; + X16 = 17; X17 = 18; X18 = 19; X19 = 20; X20 = 21; X21 = 22; X22 = 23; + X23 = 24; X24 = 25; X25 = 26; X26 = 27; X27 = 28; X28 = 29; X29 = 30; + X30 = 31; X31 = 32; + }; + required RegName name = 1; +} + +message ThreeRegOpcode { + enum Op { + ADD = 1; SUB = 2; + }; + required Op op = 1; +} + +message ThreeRegSeq { + required Register operand1 = 1; + required Register operand2 = 2; + required Register operand3 = 3; +} + +message ThreeRegStatement { + required ThreeRegOpcode opcode = 1; + required ThreeRegSeq operands = 2; +} + +message AsmStatement { + oneof asmstatement_oneof { + ThreeRegStatement threeregstatement= 2; + } +} + +message AsmStatementSeq { + repeated AsmStatement statements = 1; +} + +message Assembly { + required AsmStatementSeq asmStatements = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/handle-asm/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/handle-asm/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} Support) + +add_llvm_library(mcHandleASM + handle_asm.cpp + ) Index: tools/llvm-mc-assemble-proto-fuzzer/handle-asm/handle_asm.h =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/handle-asm/handle_asm.h @@ -0,0 +1,24 @@ +//==-- handle_asm.h - Helper function for mc fuzzers --------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines HandleASM for use by the MC fuzzers. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_TOOLS_MC_FUZZER_HANDLE_ASM_HANDLEASM_H +#define LLVM_MC_TOOLS_MC_FUZZER_HANDLE_ASM_HANDLEASM_H + +#include +#include + +namespace mc_proto_fuzzer { +void HandleASM(const std::string &S); +} // namespace mc_proto_fuzzer + +#endif Index: tools/llvm-mc-assemble-proto-fuzzer/handle-asm/handle_asm.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/handle-asm/handle_asm.cpp @@ -0,0 +1,318 @@ +//==-- handle_asm.cpp - Helper function for mc fuzzers ------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements HandleASM for use by the mc fuzzers. +// +//===----------------------------------------------------------------------===// + +#include "handle_asm.h" + +#include "llvm-c/Target.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.inc" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ToolOutputFile.h" + +#include + +using namespace llvm; +using namespace mc_proto_fuzzer; + +static cl::opt OutputDirname("out", + cl::desc("Directory name for " + "llvm-mc-assemble-proto-fuzzer " + "output when filetype=obj"), + cl::value_desc("dirname"), + cl::init("./outputdir")); + +static cl::opt + TripleName("triple", cl::desc("Target triple to assemble for, " + "see -version for available targets")); + +static cl::list + MAttrs("mattr", cl::CommaSeparated, + cl::desc("Target specific attributes (-mattr=help for details)"), + cl::value_desc("a1,+a2,-a3,...")); +// The feature string derived from -mattr's values. +std::string FeaturesStr; + +static cl::list + FuzzerArgs("fuzzer-args", cl::Positional, + cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore, + cl::PositionalEatsArgs); + +static std::vector ModifiedArgv; + +enum OutputFileType { + OFT_Null, + OFT_AssemblyFile, + OFT_ObjectFile +}; +static cl::opt +FileType("filetype", cl::init(OFT_AssemblyFile), + cl::desc("Choose an output file type:"), + cl::values( + clEnumValN(OFT_AssemblyFile, "asm", + "Emit an assembly ('.s') file"), + clEnumValN(OFT_Null, "null", + "Don't emit anything (for timing purposes)"), + clEnumValN(OFT_ObjectFile, "obj", + "Emit a native object ('.o') file"))); + +static std::unique_ptr GetOutputStream(StringRef Path) { + std::error_code EC; + auto Out = llvm::make_unique(Path, EC, sys::fs::F_None); + if (EC) { + errs() << EC.message() << '\n'; + return nullptr; + } + + return Out; +} + +std::string OutputFilename = ""; + +static int assembleInput(const char *ProgName, const Target *TheTarget, + SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str, + MCAsmInfo &MAI, MCSubtargetInfo &STI, + MCInstrInfo &MCII, MCTargetOptions &MCOptions) { + static const bool NoInitialTextSection = false; + + std::unique_ptr Parser( + createMCAsmParser(SrcMgr, Ctx, Str, MAI)); + + std::unique_ptr TAP( + TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions)); + + if (!TAP) { + errs() << ProgName + << ": error: this target '" << TripleName + << "', does not support assembly parsing.\n"; + abort(); + } + + Parser->setShowParsedOperands(true); + Parser->setTargetParser(*TAP); + + return Parser->Run(NoInitialTextSection); +} + +void mc_proto_fuzzer::HandleASM(const std::string &S) { + + const bool ShowInst = true; + const bool AsmVerbose = true; + const bool UseDwarfDirectory = true; + + Triple TheTriple(Triple::normalize(TripleName)); + + SourceMgr SrcMgr; + + std::unique_ptr BufferPtr = + llvm::MemoryBuffer::getMemBuffer(S); + + // Tell SrcMgr about this buffer, which is what the parser will pick up. + SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc()); + + static const std::vector NoIncludeDirs; + SrcMgr.setIncludeDirs(NoIncludeDirs); + + static std::string ArchName; + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple, + Error); + if (!TheTarget) { + errs() << "error: this target '" << TheTriple.normalize() + << "/" << ArchName << "', was not found: '" << Error << "'\n"; + + abort(); + } + + std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); + if (!MRI) { + errs() << "Unable to create target register info!"; + abort(); + } + + std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, TripleName)); + if (!MAI) { + errs() << "Unable to create target asm info!"; + abort(); + } + + MCObjectFileInfo MOFI; + MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); + + static const bool UsePIC = false; + MOFI.InitMCObjectFileInfo(TheTriple, UsePIC, Ctx); + + const unsigned OutputAsmVariant = 0; + std::unique_ptr MCII(TheTarget->createMCInstrInfo()); + + MCInstPrinter *IP = nullptr; + const char *ProgName = "llvm-mc-assemble-proto-fuzzer"; + std::unique_ptr STI( + TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); + std::unique_ptr MAB = nullptr; + + std::unique_ptr CE; + CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); + + MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); + + std::string OutputString; + raw_string_ostream Out(OutputString); + auto FOut = llvm::make_unique(Out); + + std::unique_ptr Str; + + if (FileType == OFT_AssemblyFile) { + IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant, + *MAI, *MCII, *MRI); + if (!IP) { + errs() + << "error: unable to create instruction printer for target triple '" + << TheTriple.normalize() << "' with assembly variant " + << OutputAsmVariant << ".\n"; + + abort(); + } + + std::unique_ptr MAB( + TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); + Str.reset(TheTarget->createAsmStreamer(Ctx, std::move(FOut), AsmVerbose, + UseDwarfDirectory, IP, std::move(CE), + std::move(MAB), ShowInst)); + Str->setUseAssemblerInfoForParsing(true); + const int Res = assembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, + *MAI, *STI, *MCII, MCOptions); + + (void) Res; + } else { + assert(FileType == OFT_ObjectFile && "Invalid file type!"); + MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions); + + std::error_code EC; + if (OutputFilename.empty()) { + errs() << "output dir name is empty\n"; + OutputFilename = "-"; + } + errs() << "output file name is " << OutputFilename << "\n"; + std::unique_ptr Out = GetOutputStream(OutputFilename); + assert(Out && "Output Stream is null!"); + + // Don't waste memory on names of temp labels. + Ctx.setUseNamesOnTempLabels(false); + + std::unique_ptr BOS; + raw_pwrite_stream *OS = &Out->os(); + if (!Out->os().supportsSeeking()) { + BOS = make_unique(Out->os()); + OS = BOS.get(); + } + + Str.reset(TheTarget->createMCObjectStreamer( + TheTriple, Ctx, std::unique_ptr(MAB), + MAB->createObjectWriter(*OS), std::move(CE), *STI, + MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ false)); + Str->setUseAssemblerInfoForParsing(true); + const int Res = assembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, + *MAI, *STI, *MCII, MCOptions); + + Out->keep(); + + (void) Res; + } + + return; +} + +extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, + char ***argv) { + // The command line is unusual compared to other fuzzers due to the need to + // specify the target. Options like -triple, -mcpu, and -mattr work like + // their counterparts in llvm-mc, while -fuzzer-args collects options for the + // fuzzer itself. + // + // Examples: + // + // Fuzz the RISCV32 assembler using 100,000 inputs of up to 32-bytes each + // and use the contents of ./corpus as the test corpus: + // llvm-mc-assemble-proto-fuzzer -triple riscv32 \ + // -fuzzer-args -max_len=32 -runs=100000 ./corpus + // + // If your aim is to find instructions that are not tested, then it is + // advisable to constrain the maximum input size to a single instruction + // using -max_len as in the first example. This results in a test corpus of + // individual instructions that test unique paths. Without this constraint, + // there will be considerable redundancy in the corpus. + + char **OriginalArgv = *argv; + + LLVMInitializeAllTargetInfos(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllAsmParsers(); + + cl::ParseCommandLineOptions(*argc, OriginalArgv); + + // Rebuild the argv without the arguments llvm-mc-assemble-proto-fuzzer + // consumed so that the driver can parse its arguments. + // + // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs. + // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a + // non-const buffer to avoid the need to clean up when the fuzzer terminates. + + ModifiedArgv.push_back(OriginalArgv[0]); + for (const auto &FuzzerArg : FuzzerArgs) { + for (int i = 1; i < *argc; ++i) { + if (FuzzerArg == OriginalArgv[i]) + ModifiedArgv.push_back(OriginalArgv[i]); + } + } + + *argc = ModifiedArgv.size(); + *argv = ModifiedArgv.data(); + // Check for specified corpus directory or file. + for (int I = 1; I < *argc; I++) { + struct stat S; + if (stat(ModifiedArgv[I], &S) == 0) { + if (S.st_mode & S_IFREG) { + std::string NewFilename(ModifiedArgv[I]); + std::size_t Pos = NewFilename.find_last_of("/"); + if (Pos != std::string::npos) + NewFilename = NewFilename.erase(0, Pos+1); + OutputFilename = OutputDirname + "/" + NewFilename + ".o"; + break; + } + } + } + + // Package up features to be passed to target/subtarget + // We have to pass it via a global since the callback doesn't + // permit any user data. + if (MAttrs.size()) { + SubtargetFeatures Features; + for (unsigned i = 0; i != MAttrs.size(); ++i) + Features.AddFeature(MAttrs[i]); + FeaturesStr = Features.getString(); + } + + if (TripleName.empty()) + TripleName = sys::getDefaultTargetTriple(); + + return 0; +} Index: tools/llvm-mc-assemble-proto-fuzzer/mcfuzz.py =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/mcfuzz.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python + +import argparse +import difflib +import os +import shlex +import subprocess +import sys +import tempfile + +parser = argparse.ArgumentParser() + +# Flags for the directory names, corpus and outputdir. +parser.add_argument("--corpus", type=str, help="corpus directory name", \ + required=True) +parser.add_argument("--out", type=str, \ + help="output directory name for obj files", default=None) + +# Flags for llvm-mc-assemble-proto-fuzzer: triple, mattr, filetype. +parser.add_argument("--triple", type=str, help="specify the triple", \ + default="riscv32") +parser.add_argument("--mattr", type=str, help="specify mattr", \ + default="") +parser.add_argument("--filetype", type=str, help="asm or obj", \ + default="obj") + +# These args are passed in after the -fuzzer-args flag. +parser.add_argument("--runs", type=int, help="number of runs", default=100) +parser.add_argument("--max-len", type=int, \ + help="limit instruction size for fuzzing", default=40) + +# Flag specifies the name of objdump executable. +parser.add_argument("--objdump", type=str, help="specify the path to" \ + " objdump", default="/prj/llvm-arm/home/common/build_tools/" \ + "riscv32-gnu-7.2.0/bin/riscv32-unknown-linux-gnu-objdump") + +# Flag specifies that we should print out everything +parser.add_argument("--verbose", dest="verbose", action="store_true") +parser.set_defaults(verbose=False) + +parser.add_argument("--riscv-no-aliases", dest="NoAliases", + action="store_true") +parser.set_defaults(NoAliases=False) + +args = parser.parse_args() + +# Step 1: Invoke fuzzer to generate a corpus. +cmd = 'llvm-mc-assemble-proto-fuzzer {corpus} -triple={triple} -mattr={mattr}' \ + + ' -filetype={filetype} -fuzzer-args -runs={runs} -max_len={max_len}' +cmd = cmd.format(corpus=args.corpus, triple=args.triple, mattr=args.mattr, \ + filetype=args.filetype, runs=args.runs, max_len=args.max_len) +fuzz_command = shlex.split(cmd) + +fuzz_proc = subprocess.Popen(fuzz_command, stdout=subprocess.PIPE, \ + stderr=subprocess.STDOUT) +fuzz_out, fuzz_err = fuzz_proc.communicate() +if fuzz_proc.returncode != 0: + raise ValueError('failed to run fuzz {}: {}'.format(fuzz_command, fuzz_err)) +if args.verbose: + print(fuzz_out) + +# If user specified an output directory, proceed to step 2; otherwise, exit. +if args.out is None: + print("No output directory specified; exiting after populating " + "corpus directory.") + sys.exit(0) + +# Keep track of which diffs pass / fail. +passes = 0 +fails = 0 +list_of_failures = [] +# Iterate through the corpus body. +# For each file, generate a .s and a .o file. Then, use objdump to +# generate a .objdump file, which we will compare to the .s file. +# Diff the .s with the corresponding .objdump file. +for filename in os.listdir(args.corpus): + + filename_prefix = args.out + "/" + filename + + # Step 2: Run proto-to-asm on corpus file to generate .s file. + proto_to_asm_command = ["llvm-mc-assemble-proto-to-asm", \ + args.corpus + "/" + filename, "-riscv-no-aliases=" + + str(args.NoAliases)] + asm_file = open(filename_prefix + ".s", "w+r") + asm_proc = subprocess.Popen(proto_to_asm_command, stdout=asm_file, + stderr=subprocess.STDOUT) + asm_out, asm_err = asm_proc.communicate() + if asm_proc.returncode != 0: + raise ValueError('failed to run {}: {}'.format(proto_to_asm_command, + asm_err)) + + # Step 3: Generate .o files in the outputdir. + cmd = 'llvm-mc-assemble-proto-fuzzer {corpus}/{file} -triple={triple}' \ + + ' -mattr={mattr} -out={out} -filetype=obj -fuzzer-args -runs=1' + cmd = cmd.format(corpus=args.corpus, file=filename, triple=args.triple, \ + mattr=args.mattr, out=args.out) + obj_files_command = shlex.split(cmd) + obj_proc = subprocess.Popen(obj_files_command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + obj_out, obj_err = obj_proc.communicate() + if obj_proc.returncode != 0: + raise ValueError('failed to run fuzzer {}: {}'.format( \ + obj_files_command, obj_err)) + if args.verbose: + print(obj_out) + + # Step 4: Call objdump on each .o file in the output directory, + # to generate corresponding .objdump files. + objdump_file = open(filename_prefix + ".objdump", "w+r") + cmd = '{objdump} -dr -M numeric {filename}.o' + cmd = cmd.format(objdump=args.objdump, filename=filename_prefix) + objdump_command = shlex.split(cmd) + objdump_proc = subprocess.Popen(objdump_command, + stdout=objdump_file, stderr=subprocess.STDOUT) + objdump_out, objdump_err = objdump_proc.communicate() + if objdump_proc.returncode != 0: + raise ValueError('failed to run objdump {}: ' \ + + '{}'.format(objdump_command, objdump_err)) + + # Step 5: Process asm files so that files only contain asm instructions. + asm_file.seek(0) + lines = asm_file.readlines() + asm_file.close() + write_asm_file = open(filename_prefix + ".parsed_s", "w") + # Remove the first line of .s file, which contains the filename. For example: + # // corpus/dff0318decde43ce5065a4209412aa2c68d01318 + for line in lines: + if "\t" in line: + write_asm_file.write(line) + write_asm_file.close() + + # Step 6: Process files generated by objdump so that the files only contain + # instructions. + objdump_file.seek(0) + temp_objdump_file = tempfile.TemporaryFile("w+r") + # Only the lines containing asm instructions have tabs in them; remove all + # other lines (for example, header lines and whitespace). + for line in objdump_file: + if "\t" in line: + temp_objdump_file.write(line) + objdump_file.close() + with open(filename_prefix + ".parsed_objdump", "w") as write_objdump_file: + temp_objdump_file.seek(0) + # Each line of the objdump output looks something like this: + # 0: 00318033 add x0,x3,x3 + # We remove the first two columns, leaving only the asm instruction. + for line in temp_objdump_file: + parts = line.split()[2:] + write_objdump_file.write("\t"); + write_objdump_file.write('\t'.join(parts)) + write_objdump_file.write("\n") + temp_objdump_file.close() + + # Step 7: Diff the .s file generated by proto-to-asm and the .objdump file + # generated by objdump. + prefix = "Checking " + filename + "..." + file1 = open(filename_prefix + ".parsed_s", "r") + file2 = open(filename_prefix + ".parsed_objdump", "r") + diff = difflib.ndiff(file1.readlines(), file2.readlines()) + delta = ''.join(x[0:] for x in diff if x.startswith(('- ', '+ '))) + if delta: + print(prefix + "FAILURE!") + fails = fails + 1 + list_of_failures.append(filename) + list_of_failures.append(delta) + else: + print(prefix + "SUCCESS!") + passes = passes + 1 + +print("Succeeded: " + str(passes) + "\t\tFailed: " + str(fails) \ + + "\t\t[Total: " + str(passes + fails) + "]") + +if (fails != 0): + print("The following files failed...") + for item in list_of_failures: + print(item) + +sys.exit(0 if fails == 0 else 1) Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/CMakeLists.txt @@ -0,0 +1,14 @@ +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD}) +set(CMAKE_CXX_FLAGS ${CXX_FLAGS_NOFUZZ}) + +# Needed by LLVM's CMake checks because this file defines multiple targets. +set(LLVM_OPTIONAL_SOURCES proto_to_asm.cpp proto_to_asm_main.cpp) + +add_clang_library(mcProtoToASM proto_to_asm.cpp + DEPENDS mcASMProto + LINK_LIBS mcASMProto ${PROTOBUF_LIBRARIES} + ) + +add_clang_executable(llvm-mc-assemble-proto-to-asm proto_to_asm_main.cpp) + +target_link_libraries(llvm-mc-assemble-proto-to-asm PRIVATE mcProtoToASM) Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.h =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.h @@ -0,0 +1,23 @@ +//==-- proto_to_asm.h - Protobuf-ASM conversion ----------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines functions for converting between protobufs and ASM. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +namespace mc_proto_fuzzer { +class Assembly; + +std::string FunctionToString(const Assembly &input); +std::string ProtoToASM(const uint8_t *data, size_t size, bool flag_enabled); +} Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.cpp @@ -0,0 +1,128 @@ +//==-- proto_to_asm.cpp - Protobuf-ASM conversion --------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements functions for converting between protobufs for an example +// assembly language grammar and the assembly language instructions. +// +//===----------------------------------------------------------------------===// +#include "proto_to_asm.h" +#include "asm_proto.pb.h" + +#include +#include + +namespace mc_proto_fuzzer { + +static bool NoAliases = false; +std::ostream &operator<<(std::ostream &OS, const Register &X) { + switch (X.name()) { + case Register::X0: OS << "x0"; break; + case Register::X1: OS << "x1"; break; + case Register::X2: OS << "x2"; break; + case Register::X3: OS << "x3"; break; + case Register::X4: OS << "x4"; break; + case Register::X5: OS << "x5"; break; + case Register::X6: OS << "x6"; break; + case Register::X7: OS << "x7"; break; + case Register::X8: OS << "x8"; break; + case Register::X9: OS << "x9"; break; + case Register::X10: OS << "x10"; break; + case Register::X11: OS << "x11"; break; + case Register::X12: OS << "x12"; break; + case Register::X13: OS << "x13"; break; + case Register::X14: OS << "x14"; break; + case Register::X15: OS << "x15"; break; + case Register::X16: OS << "x16"; break; + case Register::X17: OS << "x17"; break; + case Register::X18: OS << "x18"; break; + case Register::X19: OS << "x19"; break; + case Register::X20: OS << "x20"; break; + case Register::X21: OS << "x21"; break; + case Register::X22: OS << "x22"; break; + case Register::X23: OS << "x23"; break; + case Register::X24: OS << "x24"; break; + case Register::X25: OS << "x25"; break; + case Register::X26: OS << "x26"; break; + case Register::X27: OS << "x27"; break; + case Register::X28: OS << "x28"; break; + case Register::X29: OS << "x29"; break; + case Register::X30: OS << "x30"; break; + case Register::X31: OS << "x31"; break; + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const ThreeRegOpcode &X) { + switch (X.op()) { + case ThreeRegOpcode_Op_ADD: OS << "add"; break; + case ThreeRegOpcode_Op_SUB: OS << "sub"; break; + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const ThreeRegSeq &X) { + OS << X.operand1(); + OS << "," << X.operand2(); + OS << "," << X.operand3(); + return OS; +} +std::ostream &operator<<(std::ostream &OS, const ThreeRegStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operands() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const AsmStatement &X) { + if (X.has_threeregstatement()) { + ThreeRegSeq ThreeOperands = X.threeregstatement().operands(); + Register Oper1 = ThreeOperands.operand1(); + Register Oper2 = ThreeOperands.operand2(); + Register Oper3 = ThreeOperands.operand3(); + if (NoAliases) { + OS << X.threeregstatement(); + return OS; + } + if (X.threeregstatement().opcode().op() == ThreeRegOpcode_Op_ADD) { + OS << X.threeregstatement(); + return OS; + } + else if (X.threeregstatement().opcode().op() == + ThreeRegOpcode_Op_SUB) { + if (Oper2.name() == Register_RegName_X0) { + OS << "\tneg\t" << Oper1 << "," << Oper3 << "\n"; + return OS; + } else { + OS << X.threeregstatement(); + return OS; + } + } + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const AsmStatementSeq &X) { + for (auto &ST : X.statements()) OS << ST; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const Assembly &X) { + return OS << X.asmstatements(); +} + +// --------------------------------- + +std::string FunctionToString(const Assembly &Input) { + std::ostringstream OS; + OS << Input; + return OS.str(); +} +std::string ProtoToASM(const uint8_t *Data, size_t Size, bool Flag) { + Assembly Message; + NoAliases = Flag; + if (!Message.ParsePartialFromArray(Data, Size)) + return "#error invalid proto\n"; + return FunctionToString(Message); +} + +} // namespace mc_proto_fuzzer Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_main.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_main.cpp @@ -0,0 +1,52 @@ +//==-- proto_to_asm_main.cpp - Driver for protobuf-ASM conversion ----------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements a simple driver to print an ASM program from a protobuf. +// +//===----------------------------------------------------------------------===// +#include "proto_to_asm.h" + +#include "llvm/Support/CommandLine.h" + +#include +#include +#include +#include + +using namespace llvm; +static cl::opt NoAliases("riscv-no-aliases", + cl::desc("Set to false to match printed asm" + " of objdump"), + cl::value_desc("boolean"), + cl::init(false)); + +int main(int argc, char **argv) { + static std::vector ModifiedArgv; + ModifiedArgv.push_back(argv[0]); + static std::vector NewArgv; + NewArgv.push_back(argv[0]); + for (int i = 1; i < argc; i++) { + std::string SearchString(argv[i]); + if (SearchString.find("riscv-no-aliases") != llvm::StringLiteral::npos) + ModifiedArgv.push_back(argv[i]); + else + NewArgv.push_back(argv[i]); + } + cl::ParseCommandLineOptions((int) ModifiedArgv.size(), &ModifiedArgv[0]); + for (int i = 1; i < (int) NewArgv.size(); i++) { + std::fstream in(NewArgv[i]); + std::string str((std::istreambuf_iterator(in)), + std::istreambuf_iterator()); + std::cout << "// " << NewArgv[i] << std::endl; + std::cout << mc_proto_fuzzer::ProtoToASM( + reinterpret_cast(str.data()), str.size(), + (bool) NoAliases); + } + return 0; +}