Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -336,7 +336,8 @@ option(BUILD_SHARED_LIBS "Build all libraries as shared libraries instead of static" OFF) -option(ENABLE_ASSEMBLE_PROTO_FUZZER "Build LLVM MC -assemble protobuf fuzzer." OFF) +option(ENABLE_ASSEMBLE_PROTO_FUZZER "Build LLVM MC Assembler protobuf fuzzer." OFF) +option(ENABLE_DISASSEMBLE_PROTO_FUZZER "Build LLVM MC Disassembler protobuf fuzzer." OFF) option(LLVM_ENABLE_BACKTRACES "Enable embedding backtraces on crash." ON) if(LLVM_ENABLE_BACKTRACES) Index: cmake/modules/ProtobufMutator2.cmake =================================================================== --- /dev/null +++ cmake/modules/ProtobufMutator2.cmake @@ -0,0 +1,22 @@ +# FIXME: Fix double build of protobuf_mutator +set(PBM_PREFIX protobuf_mutator_2) +set(PBM_PATH ${CMAKE_CURRENT_BINARY_DIR}/${PBM_PREFIX}/src/${PBM_PREFIX}) +set(PBM_LIB_PATH ${PBM_PATH}-build/src/libprotobuf-mutator.a) +set(PBM_FUZZ_LIB_PATH ${PBM_PATH}-build/src/libfuzzer/libprotobuf-mutator-libfuzzer.a) +set(CMAKE_MODULE_PATH "${PB_PATH};${CMAKE_MODULE_PATH}") + +ExternalProject_Add(${PBM_PREFIX} + PREFIX ${PBM_PREFIX} + GIT_REPOSITORY ${PBM_REPO} + GIT_TAG master + CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_PREFIX_PATH=${PBM_FUZZ_PATH} + CMAKE_CACHE_ARGS -DCMAKE_C_COMPILER:FILEPATH=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER} + BUILD_BYPRODUCTS ${PBM_LIB_PATH} ${PBM_FUZZ_LIB_PATH} + UPDATE_COMMAND "" + INSTALL_COMMAND "" + ) + +set(ProtobufMutator_INCLUDE_DIRS ${PBM_PATH}) +set(ProtobufMutator_LIBRARIES ${PBM_FUZZ_LIB_PATH} ${PBM_LIB_PATH}) Index: tools/CMakeLists.txt =================================================================== --- tools/CMakeLists.txt +++ tools/CMakeLists.txt @@ -40,6 +40,7 @@ add_llvm_tool_subdirectory(llvm-lto) add_llvm_tool_subdirectory(llvm-profdata) add_llvm_tool_subdirectory(llvm-mc-assemble-proto-fuzzer) +add_llvm_tool_subdirectory(llvm-mc-disassemble-proto-fuzzer) # Projects supported via LLVM_EXTERNAL_*_SOURCE_DIR need to be explicitly # specified. Index: tools/llvm-mc-disassemble-proto-fuzzer/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/CMakeLists.txt @@ -0,0 +1,69 @@ +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} FuzzMutate) +set(CXX_FLAGS_NOFUZZ ${CMAKE_CXX_FLAGS}) +set(DUMMY_MAIN ProtoFuzzer.cpp) +if(LLVM_LIB_FUZZING_ENGINE) + unset(DUMMY_MAIN) +elseif(LLVM_USE_SANITIZE_COVERAGE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer") + set(CXX_FLAGS_NOFUZZ "${CXX_FLAGS_NOFUZZ} -fsanitize=fuzzer-no-link") + unset(DUMMY_MAIN) +endif() + +# Needed by LLVM's CMake checks because this file defines multiple targets. +set(LLVM_OPTIONAL_SOURCES + ProtoFuzzer.cpp + ) + +if(ENABLE_DISASSEMBLE_PROTO_FUZZER) + # Create protobuf .h and .cc files, and put them in a library for use by + # llvm-mc-proto-fuzzer components. + find_package(Protobuf REQUIRED) + add_definitions(-DGOOGLE_PROTOBUF_NO_RTTI) + include_directories(${PROTOBUF_INCLUDE_DIRS}) + include_directories(${CMAKE_CURRENT_BINARY_DIR}) + protobuf_generate_cpp(EXAMPLE_PROTO_SRCS EXAMPLE_PROTO_HDRS + proto-files/example_encoding_proto.proto) + + set(LLVM_OPTIONAL_SOURCES ${LLVM_OPTIONAL_SOURCES} ${PROTO_SRCS}) + + llvm_add_library(mcEncodingProto + ${EXAMPLE_PROTO_SRCS} + ${EXAMPLE_PROTO_HDRS} + + LINK_LIBS + ${PROTOBUF_LIBRARIES} + ) + + # Build and include libprotobuf-mutator + include(ProtobufMutator2) + include_directories(${ProtobufMutator_INCLUDE_DIRS}) + + # Build the .proto files. + add_llvm_subdirectory(LLVM TOOL proto-files) + + # Build the protobuf->C++ translation library and driver. + add_subdirectory(proto-to-encoding) + + # Build the fuzzer initialization library. + add_llvm_executable(llvm-mc-disassemble-proto-fuzzer + ${DUMMY_MAIN} + ProtoFuzzer.cpp + ) + + set(COMMON_PROTO_FUZZ_LIBRARIES + ${ProtobufMutator_LIBRARIES} + ${PROTOBUF_LIBRARIES} + ${LLVM_LIB_FUZZING_ENGINE} + mcHandleEncoding + ) + + target_link_libraries(llvm-mc-disassemble-proto-fuzzer + PRIVATE + ${COMMON_PROTO_FUZZ_LIBRARIES} + mcEncodingProto + mcProtoToEncoding + ) + +endif() + +add_subdirectory(handle-encoding) Index: tools/llvm-mc-disassemble-proto-fuzzer/ProtoFuzzer.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/ProtoFuzzer.cpp @@ -0,0 +1,27 @@ +//===-- ProtoFuzzer.cpp - Fuzz Disassembler -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a function that runs llvm mc disassemble on a single +/// input and uses libprotobuf-mutator to find new inputs. This function is +/// then linked into the Fuzzer library. +/// +//===----------------------------------------------------------------------===// + +#include "handle-encoding/handle_encoding.h" +#include "proto-to-encoding/proto_to_encoding.h" +#include "example_encoding_proto.pb.h" +#include "src/libfuzzer/libfuzzer_macro.h" + +using namespace mc_proto_fuzzer; + +DEFINE_BINARY_PROTO_FUZZER(const Encoding& input) { + auto S = FunctionToString(input); + HandleEncoding(S); +} Index: tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} Support) + +add_llvm_library(mcHandleEncoding + handle_encoding.cpp + ) Index: tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/handle_encoding.h =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/handle_encoding.h @@ -0,0 +1,24 @@ +//==-- handle_encoding.h - Helper function for mc fuzzers ------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Declares HandleEncoding for use by the MC fuzzers. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_TOOLS_MC_FUZZER_HANDLE_ENCODING_HANDLEENCODING_H +#define LLVM_MC_TOOLS_MC_FUZZER_HANDLE_ENCODING_HANDLEENCODING_H + +#include +#include + +namespace mc_proto_fuzzer { +void HandleEncoding(const std::string &S); +} // namespace mc_proto_fuzzer + +#endif Index: tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/handle_encoding.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/handle_encoding.cpp @@ -0,0 +1,141 @@ +//==-- handle_encoding.cpp - Helper function for mc fuzzers ----------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements HandleEncoding for use by the mc fuzzers. +// +//===----------------------------------------------------------------------===// + +#include "handle_encoding.h" + +#include "llvm-c/Disassembler.h" +#include "llvm-c/Target.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace mc_proto_fuzzer; + +const unsigned AssemblyTextBufSize = 80; + +static cl::opt + TripleName("triple", cl::desc("Target triple to assemble for, " + "see -version for available targets")); + +// This is useful for variable-length instruction sets. +static cl::opt InsnLimit( + "insn-limit", + cl::desc("Limit the number of instructions to process (0 for no limit)"), + cl::value_desc("count"), cl::init(0)); + +static cl::list + MAttrs("mattr", cl::CommaSeparated, + cl::desc("Target specific attributes (-mattr=help for details)"), + cl::value_desc("a1,+a2,-a3,...")); +// The feature string derived from -mattr's values. +std::string FeaturesStr; + +static cl::list + FuzzerArgs("fuzzer-args", cl::Positional, + cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore, + cl::PositionalEatsArgs); +static std::vector ModifiedArgv; + +int DisassembleInput(const std::string &S) { + char AssemblyText[AssemblyTextBufSize]; + std::vector DataCopy(S.begin(), S.end()); + size_t Size = S.size(); + uint8_t *p = DataCopy.data(); + + LLVMDisasmContextRef Ctx = LLVMCreateDisasmCPUFeatures( + TripleName.c_str(), "", FeaturesStr.c_str(), nullptr, 0, + nullptr, nullptr); + assert(Ctx); + unsigned Consumed; + unsigned InstructionsProcessed = 0; + do { + Consumed = LLVMDisasmInstruction(Ctx, p, Size, 0, AssemblyText, + AssemblyTextBufSize); + Size -= Consumed; + p += Consumed; + + ++InstructionsProcessed; + if (InsnLimit != 0 && InstructionsProcessed < InsnLimit) + break; + } while (Consumed != 0); + LLVMDisasmDispose(Ctx); + return 0; +} + +void mc_proto_fuzzer::HandleEncoding(const std::string &S) { + const int Res = DisassembleInput(S); + return; +} + +extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, + char ***argv) { + // The command line is unusual compared to other fuzzers due to the need to + // specify the target. Options like -triple and -mattr work like + // their counterparts in llvm-mc, while -fuzzer-args collects options for the + // fuzzer itself. + // + // Examples: + // + // Fuzz the RISCV32 disassembler using 100,000 inputs of up to 32-bytes each + // and use the contents of ./corpus as the test corpus: + // llvm-mc-disassemble-proto-fuzzer -triple riscv32 \ + // -fuzzer-args -max_len=32 -runs=100000 ./corpus + // + // If your aim is to find instructions that are not tested, then it is + // advisable to constrain the maximum input size to a single instruction + // using -max_len as in the first example. This results in a test corpus of + // individual instructions that test unique paths. Without this constraint, + // there will be considerable redundancy in the corpus. + + char **OriginalArgv = *argv; + + LLVMInitializeAllTargetInfos(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllDisassemblers(); + + cl::ParseCommandLineOptions(*argc, OriginalArgv); + + // Rebuild the argv without the arguments llvm-mc-assemble-proto-fuzzer + // consumed so that the driver can parse its arguments. + // + // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs. + // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a + // non-const buffer to avoid the need to clean up when the fuzzer terminates. + + ModifiedArgv.push_back(OriginalArgv[0]); + for (const auto &FuzzerArg : FuzzerArgs) { + for (int i = 1; i < *argc; ++i) { + if (FuzzerArg == OriginalArgv[i]) + ModifiedArgv.push_back(OriginalArgv[i]); + } + } + + *argc = ModifiedArgv.size(); + *argv = ModifiedArgv.data(); + + // Package up features to be passed to target/subtarget + // We have to pass it via a global since the callback doesn't + // permit any user data. + if (MAttrs.size()) { + SubtargetFeatures Features; + for (unsigned i = 0; i != MAttrs.size(); ++i) + Features.AddFeature(MAttrs[i]); + FeaturesStr = Features.getString(); + } + + if (TripleName.empty()) + TripleName = sys::getDefaultTargetTriple(); + + return 0; +} Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-files/example_encoding_proto.proto =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-files/example_encoding_proto.proto @@ -0,0 +1,30 @@ +//===-- example_encoding_proto.proto - Protobuf description of Encoding ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes a subset of RISC-V machine instruction encodings as a +/// protobuf. It is used by the example fuzzer to generate basic inputs +/// to fuzz the llvm mc layer. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message Opcode { + enum ValueRange { + NOP = 0; + }; + required ValueRange value = 1; +} + +message Encoding { + required Opcode opcode = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD}) +set(CMAKE_CXX_FLAGS ${CXX_FLAGS_NOFUZZ}) + +# Needed by LLVM's CMake checks because this file defines multiple targets. +set(LLVM_OPTIONAL_SOURCES example_proto_to_encoding.cpp proto_to_encoding_main.cpp) + +llvm_add_library(mcProtoToEncoding example_proto_to_encoding.cpp + DEPENDS mcEncodingProto + LINK_LIBS mcEncodingProto ${PROTOBUF_LIBRARIES} + ) + +add_llvm_executable(llvm-mc-disassemble-proto-to-encoding proto_to_encoding_main.cpp) + +target_link_libraries(llvm-mc-disassemble-proto-to-encoding PRIVATE + mcProtoToEncoding) Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/example_proto_to_encoding.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/example_proto_to_encoding.cpp @@ -0,0 +1,55 @@ +//==-- example_proto_to_encoding.cpp - Protobuf-Encoding conversion --------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements functions for converting between protobufs and bit patterns +// that represent a subset of the RISC-V machine instruction encodings. This +// example version just hardcodes the bit encoding based on an opcode, in this +// case just for the simple nop instruction. +// +//===----------------------------------------------------------------------===// +#include "proto_to_encoding.h" +#include "example_encoding_proto.pb.h" + +#include +#include +#include +#include + +using namespace google::protobuf; + +namespace mc_proto_fuzzer { + +std::ostream &operator<<(std::ostream &OS, const Opcode &X) { + switch (X.value()) { + case Opcode::NOP: OS << "0x13 0x00 0x00 0x00"; break; + default: OS << "0x00 0x00 0x00 0x13"; break; + } + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const Encoding &X) { + return OS << X.opcode() << "\n"; +} + +// --------------------------------- + +std::string FunctionToString(const Encoding &Input) { + std::ostringstream OS; + OS << Input; + return OS.str(); +} + +std::string ProtoToEncoding(const uint8_t *Data, size_t Size) { + Encoding Message; + if (!Message.ParsePartialFromArray(Data, Size)) + return "#error invalid proto\n"; + return FunctionToString(Message); +} + +} // namespace mc_proto_fuzzer Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/proto_to_encoding.h =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/proto_to_encoding.h @@ -0,0 +1,22 @@ +//==-- proto_to_encoding.h - Protobuf-Encoding conversion ------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Declares functions for converting between protobufs and encoding of RISC-V +// machine instructions. +// +//===----------------------------------------------------------------------===// + +#include + +namespace mc_proto_fuzzer { +class Encoding; + +std::string FunctionToString(const Encoding &input); +std::string ProtoToEncoding(const uint8_t *data, size_t size); +} Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/proto_to_encoding_main.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/proto_to_encoding_main.cpp @@ -0,0 +1,30 @@ +//==-- proto_to_encoding_main.cpp - Driver for protobuf-encoding conversion ==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements a simple driver to print hexadecimal encodings of machine +// instructions program from a protobuf. +// +//===----------------------------------------------------------------------===// +#include "proto_to_encoding.h" + +#include +#include +#include +#include + +int main(int argc, char **argv) { + for (int i = 1; i < argc; i++) { + std::fstream in(argv[i]); + std::string str((std::istreambuf_iterator(in)), + std::istreambuf_iterator()); + std::cout << mc_proto_fuzzer::ProtoToEncoding( + reinterpret_cast(str.data()), str.size()); + } + return 0; +}