Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -304,7 +304,8 @@ option(BUILD_SHARED_LIBS "Build all libraries as shared libraries instead of static" OFF) -option(ENABLE_ASSEMBLE_PROTO_FUZZER "Build LLVM MC -assemble protobuf fuzzer." OFF) +option(ENABLE_ASSEMBLE_PROTO_FUZZER "Build LLVM MC Assembler protobuf fuzzer." OFF) +option(ENABLE_DISASSEMBLE_PROTO_FUZZER "Build LLVM MC Disassembler protobuf fuzzer." OFF) option(LLVM_ENABLE_BACKTRACES "Enable embedding backtraces on crash." ON) if(LLVM_ENABLE_BACKTRACES) Index: cmake/modules/ProtobufMutator2.cmake =================================================================== --- /dev/null +++ cmake/modules/ProtobufMutator2.cmake @@ -0,0 +1,20 @@ +# TODO: Fix double build of protobuf_mutator +set(PBM_PREFIX protobuf_mutator2) +set(PBM_PATH ${CMAKE_CURRENT_BINARY_DIR}/${PBM_PREFIX}/src/${PBM_PREFIX}) +set(PBM_LIB_PATH ${PBM_PATH}-build/src/libprotobuf-mutator.a) +set(PBM_FUZZ_LIB_PATH ${PBM_PATH}-build/src/libfuzzer/libprotobuf-mutator-libfuzzer.a) + +ExternalProject_Add(${PBM_PREFIX} + PREFIX ${PBM_PREFIX} + GIT_REPOSITORY https://github.com/google/libprotobuf-mutator.git + GIT_TAG master + CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + CMAKE_CACHE_ARGS -DCMAKE_C_COMPILER:FILEPATH=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER} + BUILD_BYPRODUCTS ${PBM_LIB_PATH} ${PBM_FUZZ_LIB_PATH} + UPDATE_COMMAND "" + INSTALL_COMMAND "" + ) + +set(ProtobufMutator_INCLUDE_DIRS ${PBM_PATH}) +set(ProtobufMutator_LIBRARIES ${PBM_FUZZ_LIB_PATH} ${PBM_LIB_PATH}) Index: tools/CMakeLists.txt =================================================================== --- tools/CMakeLists.txt +++ tools/CMakeLists.txt @@ -40,6 +40,7 @@ add_llvm_tool_subdirectory(llvm-lto) add_llvm_tool_subdirectory(llvm-profdata) add_llvm_tool_subdirectory(llvm-mc-assemble-proto-fuzzer) +add_llvm_tool_subdirectory(llvm-mc-disassemble-proto-fuzzer) # Projects supported via LLVM_EXTERNAL_*_SOURCE_DIR need to be explicitly # specified. Index: tools/llvm-mc-disassemble-proto-fuzzer/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/CMakeLists.txt @@ -0,0 +1,115 @@ +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} FuzzMutate) +set(CXX_FLAGS_NOFUZZ ${CMAKE_CXX_FLAGS}) +set(DUMMY_MAIN ProtoFuzzer.cpp) +if(LLVM_LIB_FUZZING_ENGINE) + unset(DUMMY_MAIN) +elseif(LLVM_USE_SANITIZE_COVERAGE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer") + set(CXX_FLAGS_NOFUZZ "${CXX_FLAGS_NOFUZZ} -fsanitize=fuzzer-no-link") + unset(DUMMY_MAIN) +endif() + +# Needed by LLVM's CMake checks because this file defines multiple targets. +set(LLVM_OPTIONAL_SOURCES + ExampleProtoFuzzer.cpp + ProtoFuzzer.cpp + UnconstrainedProtoFuzzer.cpp + ) + +if(ENABLE_DISASSEMBLE_PROTO_FUZZER) + # Create protobuf .h and .cc files, and put them in a library for use by + # LLVM MC Proto Fuzzer components. + find_package(Protobuf REQUIRED) + add_definitions(-DGOOGLE_PROTOBUF_NO_RTTI) + include_directories(${PROTOBUF_INCLUDE_DIRS}) + include_directories(${CMAKE_CURRENT_BINARY_DIR}) + protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS + proto-files/riscv.proto + proto-files/riscv_a.proto + proto-files/riscv_c.proto + proto-files/riscv_d.proto + proto-files/riscv_f.proto + proto-files/riscv_i.proto + proto-files/riscv_m.proto) + protobuf_generate_cpp(EXAMPLE_PROTO_SRCS EXAMPLE_PROTO_HDRS + proto-files/example_encoding_proto.proto) + protobuf_generate_cpp(UNCONSTRAINED_PROTO_SRCS UNCONSTRAINED_PROTO_HDRS + proto-files/unconstrained_encoding.proto) + + set(LLVM_OPTIONAL_SOURCES ${LLVM_OPTIONAL_SOURCES} ${PROTO_SRCS}) + + llvm_add_library(mcEncodingProto + ${PROTO_SRCS} + ${PROTO_HDRS} + + LINK_LIBS + ${PROTOBUF_LIBRARIES} + ) + llvm_add_library(mcExampleEncodingProto + ${EXAMPLE_PROTO_SRCS} + ${EXAMPLE_PROTO_HDRS} + + LINK_LIBS + ${PROTOBUF_LIBRARIES} + ) + llvm_add_library(mcUnconstrainedEncodingProto + ${UNCONSTRAINED_PROTO_SRCS} + ${UNCONSTRAINED_PROTO_HDRS} + + LINK_LIBS + ${PROTOBUF_LIBRARIES} + ) + + # Build and include libprotobuf-mutator + include(ProtobufMutator2) + include_directories(${ProtobufMutator_INCLUDE_DIRS}) + + # Build the .proto files. + add_llvm_subdirectory(LLVM TOOL proto-files) + + # Build the protobuf->C++ translation library and driver. + add_subdirectory(proto-to-encoding) + + # Build the fuzzer initialization library. + add_llvm_executable(llvm-mc-disassemble-proto-fuzzer-riscv + ${DUMMY_MAIN} + ProtoFuzzer.cpp + ) + add_llvm_executable(llvm-mc-disassemble-proto-fuzzer-example + ${DUMMY_MAIN} + ExampleProtoFuzzer.cpp + ) + add_llvm_executable(llvm-mc-disassemble-proto-fuzzer-unconstrained + ${DUMMY_MAIN} + UnconstrainedProtoFuzzer.cpp + ) + + set(COMMON_PROTO_FUZZ_LIBRARIES + ${ProtobufMutator_LIBRARIES} + ${PROTOBUF_LIBRARIES} + ${LLVM_LIB_FUZZING_ENGINE} + mcHandleEncoding + ) + + target_link_libraries(llvm-mc-disassemble-proto-fuzzer-riscv + PRIVATE + ${COMMON_PROTO_FUZZ_LIBRARIES} + mcEncodingProto + mcProtoToEncoding + ) + target_link_libraries(llvm-mc-disassemble-proto-fuzzer-example + PRIVATE + ${COMMON_PROTO_FUZZ_LIBRARIES} + mcExampleEncodingProto + mcExampleProtoToEncoding + ) + target_link_libraries(llvm-mc-disassemble-proto-fuzzer-unconstrained + PRIVATE + ${COMMON_PROTO_FUZZ_LIBRARIES} + mcUnconstrainedEncodingProto + mcUnconstrainedProtoToEncoding + ) + +endif() + +add_subdirectory(handle-encoding) Index: tools/llvm-mc-disassemble-proto-fuzzer/ExampleProtoFuzzer.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/ExampleProtoFuzzer.cpp @@ -0,0 +1,27 @@ +//===-- ExampleProtoFuzzer.cpp - Fuzz Disassembler ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a function that runs LLVM MC disassemble on a single +/// input and uses libprotobuf-mutator to find new inputs. This function is +/// then linked into the Fuzzer library. +/// +//===----------------------------------------------------------------------===// + +#include "example_encoding_proto.pb.h" +#include "handle-encoding/handle_encoding.h" +#include "proto-to-encoding/proto_to_encoding.h" +#include "src/libfuzzer/libfuzzer_macro.h" + +using namespace mc_proto_fuzzer; + +DEFINE_BINARY_PROTO_FUZZER(const Encoding &input) { + auto S = FunctionToString(input); + HandleEncoding(S); +} Index: tools/llvm-mc-disassemble-proto-fuzzer/ProtoFuzzer.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/ProtoFuzzer.cpp @@ -0,0 +1,27 @@ +//===-- ProtoFuzzer.cpp - Fuzz Disassembler -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a function that runs LLVM MC disassemble on a single +/// input and uses libprotobuf-mutator to find new inputs. This function is +/// then linked into the Fuzzer library. +/// +//===----------------------------------------------------------------------===// + +#include "handle-encoding/handle_encoding.h" +#include "proto-to-encoding/proto_to_encoding.h" +#include "riscv.pb.h" +#include "src/libfuzzer/libfuzzer_macro.h" + +using namespace mc_proto_fuzzer; + +DEFINE_BINARY_PROTO_FUZZER(const Encoding& input) { + auto S = FunctionToString(input); + HandleEncoding(S); +} Index: tools/llvm-mc-disassemble-proto-fuzzer/README.txt =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/README.txt @@ -0,0 +1,117 @@ +------------------------------------------------------------------------------- +Building: +------------------------------------------------------------------------------- +From your LLVM source directory: +$ mkdir -p build/llvm && cd build/llvm +$ cmake -GNinja \ + -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="RISCV" \ + -DCMAKE_VERBOSE_MAKEFILE=ON \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DLLVM_USE_SANITIZE_COVERAGE=YES \ + -DLLVM_USE_SANITIZER=Address \ + -DENABLE_DISASSEMBLE_PROTO_FUZZER=ON \ + ../../llvm +$ ninja -v llvm-mc-disassemble-proto-fuzzer-example \ + llvm-mc-disassemble-proto-to-encoding-example \ + llvm-mc-disassemble-proto-fuzzer-riscv \ + llvm-mc-disassemble-proto-to-encoding-riscv \ + llvm-mc-disassemble-proto-fuzzer-unconstrained \ + llvm-mc-disassemble-proto-to-encoding-unconstrained + +------------------------------------------------------------------------------- +Setting Up: +------------------------------------------------------------------------------- +Create a corpus directory and a directory for output files (for example, +./corpus and ./outputdir). + +Make sure that llvm-mc-disassemble-proto-fuzzer-riscv and +llvm-mc-disassemble-proto-to-encoding-riscv (or whichever fuzzer version +you want to run) are in your path. + +------------------------------------------------------------------------------- +Running: +------------------------------------------------------------------------------- +A. If you would like to run the fuzzer for a long period of time. +------------------------------------------------------------------------------- +Run the fuzzer (llvm-mc-disassemble-proto-fuzzer-riscv), specifying a corpus +directory and any other relevant arguments. + +For example, from your build/llvm directory: +$ ./bin/llvm-mc-disassemble-proto-fuzzer-riscv corpus \ + -triple=riscv32 \ + -mattr=+a,+c,+m,+f,+d + +When the fuzzer finishes running (the corpus should converge relatively quickly, +likely within an hour), then run the Python script. +------------------------------------------------------------------------------- +B. If you would only like to run the fuzzer for a specific number of runs. +------------------------------------------------------------------------------- +Simply run it via the Python script and pass --runs on the +command line. + +Specify any args you would like to run the fuzzer with, including the corpus +directory and the full path to the GNU objdump and GNU assembler executables. + +For example: + +$ python mcfuzz.py --corpus corpus \ + --max-len 32 \ + --runs 1000 \ + --triple riscv64 \ + --out outputdir \ + --objdump /full/path/to/riscv64-unknown-linux-gnu-objdump \ + --mattr +a,+c,+m,+d,+f \ + --march rv64imafdc \ + --assemble /full/path/to/riscv64-unknown-linux-gnu-as \ + --summary summary64.txt + +By default, the script will run the constrained fuzzer that combines a 32-bit +fuzzer-generated value with the fixed fields (like opcode and funct3) that +correspond to a fuzzed instruction type. The constrained fuzzer enumerates +all the instructions in the RISC-V 32 and 64 ISAs. + +Running the script with --unconstrained will run the unconstrained fuzzer, +which simply attempts to disassemble a fuzzer-generated 32-bit unsigned value. + +Run the script with the --verbose flag if you would like to see the output +of the fuzzer as it runs, along with other relevant information. Otherwise, the +script will simply print out a summary of results at the end. + +Running the script with --summary will change the name of the +summary file (default summary.txt). + +NOTE: If you run the script without specifying an output directory [--out], +the script will terminate after fuzzing the corpus a specified +number [--runs] of times. + +After the first part of the script completes, the corpus directory should +contain the generated corpus files. + +Next, we populate the output directory by iterating through all the files +currently in the corpus. First, we run the golden disassembler (GNU) and check +its behavior (successfully disassembled, or failed to disassemble) against +the behavior of our fuzz target (LLVM-MC disassembler). If and only if both +disassemblers disassemble the input, we proceed. + +We use the llvm-mc-disassemble-proto-to-encoding-riscv tool to generate +the .enc files (text files containing hexadecimal instruction encodings). +We use the llvm-mc-disassemble-proto-fuzzer-riscv tool, invoked on each file +in the corpus, with -runs=1, to generate a corresponding .s file. +We then call GNU AS on the .s file, and GNU Objdump on the resulting .o, +to produce a .ref_enc file that can be compared to our initial .enc file. + +The last part of the script prints out a summary of results: on which files the +two disassemblers (fuzz target LLVM disassembler and golden disassembler GNU +disassembler) both failed, on which files only one disassembler failed, and +on which files both disassemblers successfully assembled the input +instruction encodings. + +The printed output will show the encodings that caused the fuzz target +LLVM disassembler to fail (but not the golden disassembler). It will also print +out the encodings that both disassemblers disassembled if there were any +differences between the golden assembler's output and our original input +encoding. + +Finally, the summary text file generated will contain more detailed information +such as the files and ASM statements whose encodings differed. Index: tools/llvm-mc-disassemble-proto-fuzzer/UnconstrainedProtoFuzzer.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/UnconstrainedProtoFuzzer.cpp @@ -0,0 +1,27 @@ +//===-- UnconstrainedProtoFuzzer.cpp - Fuzz Disassembler ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a function that runs LLVM MC disassemble on a single +/// input and uses libprotobuf-mutator to find new inputs. This function is +/// then linked into the Fuzzer library. +/// +//===----------------------------------------------------------------------===// + +#include "handle-encoding/handle_encoding.h" +#include "proto-to-encoding/proto_to_encoding.h" +#include "unconstrained_encoding.pb.h" +#include "src/libfuzzer/libfuzzer_macro.h" + +using namespace mc_proto_fuzzer; + +DEFINE_BINARY_PROTO_FUZZER(const Encoding& input) { + auto S = FunctionToString(input); + HandleEncoding(S); +} Index: tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/CMakeLists.txt @@ -0,0 +1,6 @@ +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} Support) + +add_llvm_library(mcHandleEncoding + handle_encoding.cpp + Disassembler.cpp + ) Index: tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/Disassembler.h =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/Disassembler.h @@ -0,0 +1,43 @@ +//===- Disassembler.h - Text File Disassembler ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the disassembler of strings of bytes written in +// hexadecimal, from standard input or from a file. +// Based on llvm-mc source code. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MC_DISASSEMBLER_H +#define LLVM_TOOLS_LLVM_MC_DISASSEMBLER_H + +#include + +namespace llvm { + +class MemoryBuffer; +class Target; +class raw_ostream; +class SourceMgr; +class MCSubtargetInfo; +class MCStreamer; + +class Disassembler { +public: + static int disassemble(const Target &T, + const std::string &Triple, + MCSubtargetInfo &STI, + MCStreamer &Streamer, + MemoryBuffer &Buffer, + SourceMgr &SM, + raw_ostream &Out); +}; + +} // namespace llvm + +#endif Index: tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/Disassembler.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/Disassembler.cpp @@ -0,0 +1,206 @@ +//===- Disassembler.cpp - Disassembler for hex strings --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the disassembler of strings of bytes written in +// hexadecimal, from standard input or from a file. +// Based on llvm-mc source code. +// +//===----------------------------------------------------------------------===// + +#include "Disassembler.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +typedef std::pair, std::vector> + ByteArrayTy; + +static bool PrintInsts(const MCDisassembler &DisAsm, const ByteArrayTy &Bytes, + SourceMgr &SM, raw_ostream &Out, MCStreamer &Streamer, + bool InAtomicBlock, const MCSubtargetInfo &STI) { + ArrayRef Data(Bytes.first.data(), Bytes.first.size()); + + // Disassemble it to strings. + uint64_t Size; + uint64_t Index; + + for (Index = 0; Index < Bytes.first.size(); Index += Size) { + MCInst Inst; + + MCDisassembler::DecodeStatus S; + S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, + /*REMOVE*/ nulls(), nulls()); + switch (S) { + case MCDisassembler::Fail: + SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]), + SourceMgr::DK_Warning, "invalid instruction encoding"); + // Don't try to resynchronise the stream in a block + if (InAtomicBlock) + return true; + + // Skip illegible bytes. + if (Size == 0) + Size = 1; + + break; + + case MCDisassembler::SoftFail: + SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]), + SourceMgr::DK_Warning, + "potentially undefined instruction encoding"); + LLVM_FALLTHROUGH; + + case MCDisassembler::Success: + Streamer.EmitInstruction(Inst, STI); + break; + } + } + + return false; +} + +static bool SkipToToken(StringRef &Str) { + while (true) { + if (Str.empty()) + return false; + + // Strip horizontal whitespace and commas. + if (size_t Pos = Str.find_first_not_of(" \t\r\n,")) { + Str = Str.substr(Pos); + continue; + } + + // If this is the start of a comment, remove the rest of the line. + if (Str[0] == '#') { + Str = Str.substr(Str.find_first_of('\n')); + continue; + } + return true; + } +} + +static bool ByteArrayFromString(ByteArrayTy &ByteArray, StringRef &Str, + SourceMgr &SM) { + while (SkipToToken(Str)) { + // Handled by higher level. + if (Str[0] == '[' || Str[0] == ']') + return false; + + // Get the current token. + size_t Next = Str.find_first_of(" \t\n\r,#[]"); + StringRef Value = Str.substr(0, Next); + + // Convert to a byte and add to the byte vector. + unsigned ByteVal; + if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) { + // If we have an error, print it and skip to the end of line. + SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error, + "invalid input token"); + Str = Str.substr(Str.find('\n')); + ByteArray.first.clear(); + ByteArray.second.clear(); + continue; + } + + ByteArray.first.push_back(ByteVal); + ByteArray.second.push_back(Value.data()); + Str = Str.substr(Next); + } + + return false; +} + +int Disassembler::disassemble(const Target &T, const std::string &Triple, + MCSubtargetInfo &STI, MCStreamer &Streamer, + MemoryBuffer &Buffer, SourceMgr &SM, + raw_ostream &Out) { + + std::unique_ptr MRI(T.createMCRegInfo(Triple)); + if (!MRI) { + errs() << "error: no register info for target " << Triple << "\n"; + return -1; + } + + std::unique_ptr MAI(T.createMCAsmInfo(*MRI, Triple)); + if (!MAI) { + errs() << "error: no assembly info for target " << Triple << "\n"; + return -1; + } + + // Set up the MCContext for creating symbols and MCExpr's. + MCContext Ctx(MAI.get(), MRI.get(), nullptr); + + std::unique_ptr DisAsm( + T.createMCDisassembler(STI, Ctx)); + if (!DisAsm) { + errs() << "error: no disassembler for target " << Triple << "\n"; + return -1; + } + + // Set up initial section manually here + Streamer.InitSections(false); + + bool ErrorOccurred = false; + + // Convert the input to a vector for disassembly. + ByteArrayTy ByteArray; + StringRef Str = Buffer.getBuffer(); + bool InAtomicBlock = false; + + while (SkipToToken(Str)) { + ByteArray.first.clear(); + ByteArray.second.clear(); + + if (Str[0] == '[') { + if (InAtomicBlock) { + SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error, + "nested atomic blocks make no sense"); + ErrorOccurred = true; + } + InAtomicBlock = true; + Str = Str.drop_front(); + continue; + } else if (Str[0] == ']') { + if (!InAtomicBlock) { + SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error, + "attempt to close atomic block without opening"); + ErrorOccurred = true; + } + InAtomicBlock = false; + Str = Str.drop_front(); + continue; + } + + // It's a real token, get the bytes and emit them + ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM); + + if (!ByteArray.first.empty()) + ErrorOccurred |= + PrintInsts(*DisAsm, ByteArray, SM, Out, Streamer, InAtomicBlock, STI); + } + + if (InAtomicBlock) { + SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error, + "unclosed atomic block"); + ErrorOccurred = true; + } + + return ErrorOccurred; +} Index: tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/handle_encoding.h =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/handle_encoding.h @@ -0,0 +1,23 @@ +//==-- handle_encoding.h - Helper function for mc fuzzers ------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Declares HandleEncoding for use by the MC fuzzers. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_TOOLS_MC_FUZZER_HANDLE_ENCODING_HANDLEENCODING_H +#define LLVM_MC_TOOLS_MC_FUZZER_HANDLE_ENCODING_HANDLEENCODING_H + +#include + +namespace mc_proto_fuzzer { +void HandleEncoding(const std::string &S); +} // namespace mc_proto_fuzzer + +#endif Index: tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/handle_encoding.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/handle-encoding/handle_encoding.cpp @@ -0,0 +1,271 @@ +//==-- handle_encoding.cpp - Sets up Fuzz Target ---------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements HandleEncoding for use by the mc fuzzers. +// +//===----------------------------------------------------------------------===// + +#include "handle_encoding.h" + +#include "Disassembler.h" +#include "llvm-c/Target.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.inc" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +#include + +using namespace llvm; +using namespace mc_proto_fuzzer; + +static cl::opt OutputDirname("out", + cl::desc("Directory name for " + "fuzzer output"), + cl::value_desc("dirname"), + cl::init("./outputdir")); + +static cl::opt + TripleName("triple", cl::desc("Target triple to assemble for, " + "see -version for available targets")); + +// This is useful for variable-length instruction sets. +static cl::opt + InsnLimit("insn-limit", + cl::desc("Limit the number of instructions to " + "process (0 for no limit)"), + cl::value_desc("count"), cl::init(0)); + +static cl::opt + ShowEncoding("show-encoding", + cl::desc("Show instruction encodings")); + +static cl::opt + ShowInst("show-inst", cl::desc("Show internal instruction representation")); + +static cl::list + MAttrs("mattr", cl::CommaSeparated, + cl::desc("Target specific attributes (-mattr=help for details)"), + cl::value_desc("a1,+a2,-a3,...")); + +// The feature string derived from -mattr's values. +std::string FeaturesStr; + +static cl::list + FuzzerArgs("fuzzer-args", cl::Positional, + cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore, + cl::PositionalEatsArgs); + +static std::vector ModifiedArgv; + +static std::unique_ptr GetOutputStream(StringRef Path) { + std::error_code EC; + auto Out = llvm::make_unique(Path, EC, sys::fs::F_None); + if (EC) { + WithColor::error() << EC.message() << '\n'; + return nullptr; + } + return Out; +} + +static std::string OutputFilename = ""; + +void mc_proto_fuzzer::HandleEncoding(const std::string &S) { + Triple TheTriple(Triple::normalize(TripleName)); + + SourceMgr SrcMgr; + + std::unique_ptr BufferPtr = llvm::MemoryBuffer::getMemBuffer(S); + MemoryBuffer *Buffer = std::move(BufferPtr).get(); + + // Tell SrcMgr about this buffer, which is what the parser will pick up. + SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc()); + + static const std::vector NoIncludeDirs; + SrcMgr.setIncludeDirs(NoIncludeDirs); + + static std::string ArchName; + std::string Error; + const Target *TheTarget = + TargetRegistry::lookupTarget(ArchName, TheTriple, Error); + if (!TheTarget) { + errs() << "error: this target '" << TheTriple.normalize() << "/" << ArchName + << "', was not found: '" << Error << "'\n"; + + abort(); + } + + std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); + if (!MRI) { + errs() << "Unable to create target register info!"; + abort(); + } + + std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, TripleName)); + if (!MAI) { + errs() << "Unable to create target asm info!"; + abort(); + } + + MCObjectFileInfo MOFI; + MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); + + static const bool UsePIC = false; + MOFI.InitMCObjectFileInfo(TheTriple, UsePIC, Ctx); + + if (OutputFilename.empty()) { + errs() << "output dir name is empty\n"; + OutputFilename = "-"; + } + std::unique_ptr Out = GetOutputStream(OutputFilename); + assert(Out && "Output Stream is null!"); + + std::unique_ptr DwoOut; + std::unique_ptr BOS; + raw_pwrite_stream *OS = &Out->os(); + std::unique_ptr Str; + + const unsigned OutputAsmVariant = 0; + std::unique_ptr MCII(TheTarget->createMCInstrInfo()); + + MCInstPrinter *IP = nullptr; + std::unique_ptr STI( + TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); + + std::unique_ptr CE; + CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); + + MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); + + IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant, + *MAI, *MCII, *MRI); + + if (!IP) { + WithColor::error() + << "unable to create instruction printer for target triple '" + << TheTriple.normalize() << "' with assembly variant " + << OutputAsmVariant << ".\n"; + return; + } + + // Set up the AsmStreamer. + if (ShowEncoding) + CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); + + std::unique_ptr MAB( + TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); + auto FOut = llvm::make_unique(*OS); + Str.reset(TheTarget->createAsmStreamer( + Ctx, std::move(FOut), + /*asmverbose*/ true, + /*useDwarfDirectory*/ true, IP, std::move(CE), std::move(MAB), ShowInst)); + int Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str, + *Buffer, SrcMgr, Out->os()); + (void)Res; + + // Keep output if no errors. + if (Res == 0) { + Out->keep(); + if (DwoOut) + DwoOut->keep(); + } + return; +} + +extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, + char ***argv) { + // The command line is unusual compared to other fuzzers due to the need to + // specify the target. Options like -triple and -mattr work like + // their counterparts in llvm-mc, while -fuzzer-args collects options for the + // fuzzer itself. + // + // Examples: + // + // Fuzz the RISCV32 disassembler using 100,000 inputs of up to 32-bytes each + // and use the contents of ./corpus as the test corpus: + // llvm-mc-disassemble-proto-fuzzer-riscv -triple riscv32 \ + // -fuzzer-args -max_len=32 -runs=100000 ./corpus + // + // If your aim is to find instructions that are not tested, then it is + // advisable to constrain the maximum input size to a single instruction + // using -max_len as in the first example. This results in a test corpus of + // individual instructions that test unique paths. Without this constraint, + // there will be considerable redundancy in the corpus. + + char **OriginalArgv = *argv; + + LLVMInitializeAllTargetInfos(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllDisassemblers(); + + cl::ParseCommandLineOptions(*argc, OriginalArgv); + + // Rebuild the argv without the arguments that the fuzzer consumed + // so that the driver can parse its arguments. + // + // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs. + // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a + // non-const buffer to avoid the need to clean up when the fuzzer terminates. + + ModifiedArgv.push_back(OriginalArgv[0]); + for (const auto &FuzzerArg : FuzzerArgs) { + for (int i = 1; i < *argc; ++i) { + if (FuzzerArg == OriginalArgv[i]) + ModifiedArgv.push_back(OriginalArgv[i]); + } + } + + *argc = ModifiedArgv.size(); + *argv = ModifiedArgv.data(); + // Check for specified corpus directory or file. + // If a corpus file was specified, then set the OutputFilename to + // generate a corresponding output file (the disassembled .s file). + // This captures the output of the fuzz target (the disassembler), + // which can then be checked for correctness. + for (int i = 1; i < *argc; ++i) { + struct stat S; + if (stat(ModifiedArgv[i], &S) == 0) { + if (S.st_mode & S_IFREG) { + std::string NewFilename(ModifiedArgv[i]); + std::size_t Pos = NewFilename.find_last_of("/"); + if (Pos != std::string::npos) + NewFilename = NewFilename.erase(0, Pos + 1); + OutputFilename = OutputDirname + "/" + NewFilename + ".s"; + break; + } + } + } + + // Package up features to be passed to target/subtarget. + // We have to pass it via a global since the callback doesn't + // permit any user data. + if (MAttrs.size()) { + SubtargetFeatures Features; + for (unsigned i = 0; i != MAttrs.size(); ++i) + Features.AddFeature(MAttrs[i]); + FeaturesStr = Features.getString(); + } + + if (TripleName.empty()) + TripleName = sys::getDefaultTargetTriple(); + + return 0; +} Index: tools/llvm-mc-disassemble-proto-fuzzer/mcfuzz.py =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/mcfuzz.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python + +import argparse +import difflib +import os +import shlex +import subprocess +import sys +import tempfile + + +def main(): + parser = argparse.ArgumentParser() + parse_arguments(parser) + args = parser.parse_args() + + # Step 1: Invoke fuzzer for disassembler to generate a corpus. + call_fuzzer(args) + + # If the user did not specify output dir, skip steps 2-8; exit. + if args.out is None: + print("No output directory specified; exiting after populating " + "corpus directory.") + sys.exit(0) + + # List of corpus files that LLVM Disassembler and GNU Objdump + # both fail to disassemble. + llvm_dis_and_gnu_dis_fails = [] + # List of corpus files that only LLVM Disassembler fails to disassemble. + llvm_dis_fails = [] + # List of corpus files that only GNU Objdump fails to disassemble. + gnu_dis_fails = [] + # List of corpus files that both disassemblers were able to disassemble. + llvm_dis_and_gnu_dis_passes = [] + # Keep track of which diffs pass / fail. + passes = 0 + fails = 0 + diffs = [] + list_of_files = [] + + # Iterate through the corpus body. + + # For each file, re-run the fuzzer to call the Disassembler and + # disassemble the fuzzer-generated encoding into an ASM file. + # Assemble the ASM file generated by the fuzz target using GNU AS, + # and then read the the resulting object file using GNU objdump, + # and compare the hexadecimal encoding with the original fuzzer-generated + # hexadecimal encoding. + for filename in os.listdir(args.corpus): + # Step 2: Run fuzzer on specific file; check for error in fuzz target. + target_err, out = call_fuzzer_on_corpus_file(args, filename) + + # Step 3: Run proto-to-encoding on corpus file to generate .enc file. + call_proto_to_enc(args, filename) + + # Step 4: Run golden disassembler (GNU objdump) on encoding. + gnu_err, gnu_out = gnu_disassemble(args, filename) + + # Step 5: Compare behavior of fuzz target and golden disassembler. + ###################################################################### + #-- Initial, tentative interpretation of results, based on the --# + #-- status (pass/fail) of tools (LLVM Disassembler, GNU Objdump, --# + #-- and GNU Assembler) --# + #--------------------------------------------------------------------# + # LLVM DIS | GNU DIS | GNU AS | Possible Conclusion # + #--------------------------------------------------------------------# + # 0 | 0 | x | Invalid/unimplemented instr encoding # + # 0 | 1 | x | LLVM MC bug/unimplemented instr # + # 1 | 0 | x | LLVM MC bug # + # 1 | 1 | 0 | LLVM MC bug and GCC bug # + # 1 | 1 | 1 | Success # + ###################################################################### + if target_err and gnu_err: + if args.verbose: + print("Both LLVM and GNU failed to disassemble file: " + + filename) + llvm_dis_and_gnu_dis_fails.append(filename) + continue + if target_err: + if args.verbose: + print("Only target disassembler failed to disassemble file: " + + filename) + llvm_dis_fails.append(filename) + llvm_dis_fails.append(out) + continue + if gnu_err: + if args.verbose: + print("Only GNU Objdump failed to disassemble file: " + + filename) + gnu_dis_fails.append(filename) + gnu_dis_fails.append(gnu_out) + continue + if args.verbose: + print("Both assemblers assembled file: " + filename) + llvm_dis_and_gnu_dis_passes.append(filename) + + # Step 6: Call GNU AS on each .s file (generated by fuzz target in step + # 2) in the output directory, to generate corresponding .o files. + asm_err = call_golden_assembler(args, filename) + + # Step 7: Read .o files using GNU objdump. + err, objdump_out, objdump_file = disasm_file(args, filename) + process_reference_file(objdump_file, args, filename) + + # Step 8: Diff the file generated by proto-to-encoding with file + # generated by GNU Assembler + Objdump. + passes, fails, diffs, list_of_files = print_file_status(args, filename, + passes, fails, + diffs, + list_of_files) + + # Step 9: Print results. + print_result(args, passes, fails, llvm_dis_and_gnu_dis_fails, + llvm_dis_fails, gnu_dis_fails, llvm_dis_and_gnu_dis_passes, + diffs, list_of_files) + + sys.exit(0 if fails == 0 else 1) + + +def parse_arguments(parser): + # Flags for the directory names, corpus and outputdir. + parser.add_argument("--corpus", type=str, help="corpus directory name", + required=True) + parser.add_argument("--out", type=str, + help="output directory name for obj files", + default=None) + + # Flags for llvm-mc-disassemble-proto-fuzzer: triple, mattr. + parser.add_argument("--triple", type=str, help="specify the triple", + default="riscv32") + parser.add_argument("--mattr", type=str, help="specify mattr", + default="") + + # Flag for GNU: march. + parser.add_argument("--march", type=str, help="specify march", + default="rv32i") + + # These args are passed in after the -fuzzer-args flag. + parser.add_argument("--runs", type=int, help="number of runs", default=100) + parser.add_argument("--max-len", type=int, + help="limit instruction size for fuzzing", default=40) + + # Flags specify the name of golden assembler and objdump executables. + parser.add_argument("--assemble", type=str, help="specify the path to" + " golden assembler") + parser.add_argument("--objdump", type=str, help="specify the path to" + " objdump") + + # Flag specifies that we should print out everything. + parser.add_argument("--verbose", dest="verbose", action="store_true") + parser.set_defaults(verbose=False) + + # Flag indicates that the script should run the fuzzer that is totally + # unconstrained. + parser.add_argument("--unconstrained", dest="unconstrained", + action="store_true") + parser.set_defaults(unconstrained=False) + + # Flag for name of results summary file. + parser.add_argument("--summary", type=str, + help="specify name of summary file", + default="summary.txt") + + +# This function calls the llvm-mc-disassemble-proto-fuzzer. +def call_fuzzer(args): + if args.unconstrained: + cmd = 'llvm-mc-disassemble-proto-fuzzer-unconstrained {corpus} \ + -triple={triple} -mattr={mattr} ' \ + '-fuzzer-args -runs={runs} -max_len={max_len}' + else: + cmd = 'llvm-mc-disassemble-proto-fuzzer-riscv {corpus} \ + -triple={triple} -mattr={mattr} ' \ + '-fuzzer-args -runs={runs} -max_len={max_len}' + cmd = cmd.format(corpus=args.corpus, triple=args.triple, + mattr=args.mattr, runs=args.runs, + max_len=args.max_len) + fuzz_command = shlex.split(cmd) + + fuzz_proc = subprocess.Popen(fuzz_command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + fuzz_out, fuzz_err = fuzz_proc.communicate() + if fuzz_proc.returncode != 0: + raise ValueError('failed to run fuzz {}: {}'.format(fuzz_command, + fuzz_err)) + if args.verbose: + print(fuzz_out) + + +def call_fuzzer_on_corpus_file(args, filename): + target_error_occurred = False + if args.unconstrained: + cmd = 'llvm-mc-disassemble-proto-fuzzer-unconstrained {corpus}/{file} ' \ + + '-triple={triple} -mattr={mattr} -out={out} ' \ + + '-fuzzer-args -runs=1' + else: + cmd = 'llvm-mc-disassemble-proto-fuzzer-riscv {corpus}/{file} ' \ + + '-triple={triple} -mattr={mattr} -out={out} ' \ + + '-fuzzer-args -runs=1' + cmd = cmd.format(corpus=args.corpus, file=filename, + triple=args.triple, mattr=args.mattr, out=args.out) + target_dis_command = shlex.split(cmd) + target_dis_proc = subprocess.Popen(target_dis_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + target_dis_out, target_dis_err = target_dis_proc.communicate() + if target_dis_proc.returncode != 0: + raise ValueError('failed to run fuzzer {}: {}'.format( + target_dis_command, target_dis_err)) + if target_dis_out: + if "error" in target_dis_out: + target_error_occurred = True + if args.verbose: + print(target_dis_out) + return target_error_occurred, target_dis_out + + +def call_proto_to_enc(args, filename): + if args.unconstrained: + cmd = 'llvm-mc-disassemble-proto-to-encoding-unconstrained ' \ + + '{corpus}/{file}' + cmd = cmd.format(corpus=args.corpus, file=filename) + else: + cmd = 'llvm-mc-disassemble-proto-to-encoding-riscv {corpus}/{file}' + cmd = cmd.format(corpus=args.corpus, file=filename) + proto_to_enc_command = shlex.split(cmd) + enc_file = open(args.out + "/" + filename + ".enc", "w+r") + enc_proc = subprocess.Popen(proto_to_enc_command, + stdout=enc_file, + stderr=subprocess.STDOUT) + enc_out, enc_err = enc_proc.communicate() + if enc_proc.returncode != 0: + raise ValueError('failed to run {}: {}'.format(proto_to_enc_command, + enc_err)) + elif enc_out: + if args.verbose: + print("Enc_out: " + enc_out) + enc_file.seek(0) + lines = enc_file.readlines() + enc_file.close() + txt_file = open(args.out + "/" + filename + ".txt", "w") + bytes_file = open(args.out + "/" + filename + ".temp_s", "w") + for line in lines: + bytes_list = line.split(" 0x") + for i in xrange(len(bytes_list) - 1, 0, -1): + bytes_list[i] = bytes_list[i].rstrip() + txt_file.write(bytes_list[i]) + txt_file.write("\n") + bytes_to_write = ''.join(reversed(bytes_list)) + bytes_file.write(".word 0x" + bytes_to_write + "\n") + bytes_file.close() + txt_file.close() + + # Generate .o file to be disassembled; the file should only contain + # the hexadecimal encoding of the fuzzer-generated instruction(s). + # The assembler does no work in assembling `.word 0x_____`. + cmd = 'llvm-mc -assemble {dirname}/{filename}.temp_s -triple={triple} ' \ + + '-mattr={mattr} -filetype=obj -o {dirname}/{filename}.temp_o' + cmd = cmd.format(triple=args.triple, mattr=args.mattr, + dirname=args.out, filename=filename) + golden_as_command = shlex.split(cmd) + golden_as_proc = subprocess.Popen(golden_as_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + golden_as_out, golden_as_err = golden_as_proc.communicate() + if args.verbose: + print(golden_as_out) + + + +def call_golden_assembler(args, filename): + cmd = '{assemble} {dirname}/{filename}.s -march={march} ' \ + + '-o {dirname}/{filename}.o' + cmd = cmd.format(assemble=args.assemble, march=args.march, + dirname=args.out, filename=filename) + golden_as_command = shlex.split(cmd) + golden_as_proc = subprocess.Popen(golden_as_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + golden_as_out, golden_as_err = golden_as_proc.communicate() + if args.verbose: + print(golden_as_out) + if golden_as_proc.returncode != 0: + return True + + +def gnu_disassemble(args, filename): + # Now disassemble using GNU. + cmd = '{objdump} -d {dirname}/{filename}.temp_o' + cmd = cmd.format(objdump=args.objdump, dirname = args.out, + filename = filename) + golden_objdump_command = shlex.split(cmd) + reference_file = open(args.out + "/" + filename + ".reference", "w") + golden_objdump_proc = subprocess.Popen(golden_objdump_command, + stdout=reference_file, + stderr=subprocess.STDOUT) + golden_objdump_out, golden_objdump_err = golden_objdump_proc.communicate() + reference_file.close() + if args.verbose: + print(golden_objdump_out) + if golden_objdump_proc.returncode != 0: + return True, golden_objdump_out + return False, golden_objdump_out + + +def disasm_file(args, filename): + golden_error_occurred = False + cmd = '{objdump} -d {dirname}/{filename}.o' + cmd = cmd.format(objdump=args.objdump, dirname=args.out, + filename=filename) + golden_objdump_command = shlex.split(cmd) + objdump_file = open(args.out + "/" + filename + ".objdump", "w+r") + golden_objdump_proc = subprocess.Popen(golden_objdump_command, + stdout=objdump_file, + stderr=subprocess.STDOUT) + golden_objdump_out, golden_objdump_err = golden_objdump_proc.communicate() + if golden_objdump_proc.returncode != 0: + if golden_objdump_out and "Error:" in golden_objdump_out: + if args.verbose: + print("golden_objdump_out: " + golden_objdump_out) + golden_error_occurred = True + elif args.verbose: + print(golden_objdump_out) + return golden_error_occurred, golden_objdump_out, objdump_file + + +def process_reference_file(objdump_file, args, filename): + # Each line of the objdump output looks something like this: + # 0: 00318033 add x0,x3,x3 + # We remove the first and third columns, leaving only the encoding. + objdump_file.seek(0) + temp_ref_file = tempfile.TemporaryFile("w+r") + for line in objdump_file: + if "\t" in line: + temp_ref_file.write(line) + objdump_file.close() + temp_ref_file.seek(0) + + ref_file = open(args.out + "/" + filename + ".ref_enc", "w+r") + for line in temp_ref_file: + part = line.split()[:2:][1] + ref_file.write(part + "\n") + temp_ref_file.close() + ref_file.close() + + +def print_file_status(args, filename, passes, fails, diffs, list_of_files): + if args.verbose: + prefix = "Checking " + filename + "..." + file1 = open(args.out + "/" + filename + ".txt", "r") + file2 = open(args.out + "/" + filename + ".ref_enc", "r") + diff = difflib.ndiff(file1.readlines(), file2.readlines()) + file1.close() + file2.close() + delta = ''.join(x[0:] for x in diff if x.startswith(('- ', '+ '))) + if delta: + if args.verbose: + print(prefix + "FAILURE!") + fails = fails + 1 + diffs.append(filename) + diffs.append(delta) + list_of_files.append(filename) + else: + if args.verbose: + print(prefix + "SUCCESS!") + passes = passes + 1 + return passes, fails, diffs, list_of_files + + +def print_result(args, passes, fails, + list1, list2, list3, list4, list5, list6): + # Only target (LLVM MC Disassembler) fails + if len(list2) != 0: + print("Target LLVM Disassembler failed to disassemble these inputs...") + for item in list2: + print(item) + + # Golden disassembler (GNU Objdump) fails + if len(list3) != 0: + print("GNU Objdump failed to disassemble these inputs...") + for item in list3: + print(item) + + # Both disassemblers returned without error, but disassembled differently. + if fails != 0: + print("Target LLVM Disassembler and GNU Objdump both disassembled " + "these inputs, but input Hex encoding differed from " + "GNU objdump-generated encoding.") + for item in list5: + print(item) + # Write all the differences to file + summary_file = open(args.summary, "w") + for filename in list6: + summary_file.write(filename) + file1 = open(args.out + "/" + filename + ".s", "r") + file2 = open(args.out + "/" + filename + ".objdump", "r") + summary_file.write("\nDisassembled by LLVM MC Disassembler:\n") + for line in file1.readlines(): + summary_file.write(line) + file1.close() + summary_file.write("Disassembled by GNU Objdump:\n") + for line in file2.readlines(): + summary_file.write(line) + file2.close() + summary_file.write("\n") + summary_file.close() + + print("Both LLVM Disassembler and GNU Objdump fail: {}".format(len(list1))) + print("Only LLVM Disassembler fails: {}".format(len(list2))) + print("Only GNU Objdump fails: {}".format(len(list3))) + print("Both LLVM Disassembler and GNU Objdump pass: {}".format(len(list4))) + print("[Succeeded: {}/{}\tFailed: {}/{}]".format(passes, + len(list4), fails, len(list4))) + +if __name__ == "__main__": + main() Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-files/example_encoding_proto.proto =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-files/example_encoding_proto.proto @@ -0,0 +1,38 @@ +//===-- example_encoding_proto.proto - Protobuf description of Encoding ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes a subset of RISC-V machine instruction encodings as a +/// protobuf. It is used by the example fuzzer to generate basic inputs +/// to fuzz the LLVM MC layer, with just two instructions (ADDI and XORI). +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message Opcode { + enum ValueRange { + ADDI = 0; + XORI = 1; + }; + required ValueRange value = 1; +} + +// The fuzzer fuzzes an opcode and a 32-bit unsigned value, which (together) +// are used to generate a machine instruction. +message Inst { + required Opcode opcode = 1; + required uint32 encoding = 2; +} + +message Encoding { + required Inst instruction = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-files/riscv.proto =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-files/riscv.proto @@ -0,0 +1,51 @@ +//===-- riscv.proto - Protobuf description of Encoding --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the RISC-V (RV32 and RV64) machine instruction +/// encodings as a protobuf. It is used by the fuzzer to generate basic +/// inputs to fuzz the LLVM MC layer. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +import "riscv_a.proto"; +import "riscv_c.proto"; +import "riscv_d.proto"; +import "riscv_f.proto"; +import "riscv_i.proto"; +import "riscv_m.proto"; + +// The fuzzer fuzzes an opcode and a 32-bit unsigned value, which (together) +// are used to generate a machine instruction. +message Inst { + required uint32 encoding = 1; + oneof opcode_oneof { + A_RVOpcode opcode1 = 2; + A_RV64Opcode opcode2 = 3; + C_RVOpcode opcode3 = 4; + C_RV32Opcode opcode4 = 5; + C_RV64Opcode opcode5 = 6; + D_RVOpcode opcode6 = 7; + D_RV64Opcode opcode7 = 8; + F_RVOpcode opcode8 = 9; + F_RV64Opcode opcode9 = 10; + I_RVOpcode opcode10 = 11; + I_RV64Opcode opcode11 = 12; + M_RVOpcode opcode12 = 13; + M_RV64Opcode opcode13 = 14; + } +} + +message Encoding { + required Inst instruction = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-files/riscv_a.proto =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-files/riscv_a.proto @@ -0,0 +1,52 @@ +//===-- riscv_a.proto - Protobuf description of Encoding ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the Protobuf representation of RISC-V's A-extension. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message A_RVOpcode { + enum ValueRange { + AMOADD_W = 0; + AMOAND_W = 1; + AMOMAX_W = 2; + AMOMAXU_W = 3; + AMOMIN_W = 4; + AMOMINU_W = 5; + AMOOR_W = 6; + AMOSWAP_W = 7; + AMOXOR_W = 8; + LR_W = 9; + SC_W = 10; + }; + required ValueRange value = 1; +} + +message A_RV64Opcode { + enum ValueRange { + AMOADD_D = 0; + AMOAND_D = 1; + AMOMAX_D = 2; + AMOMAXU_D = 3; + AMOMIN_D = 4; + AMOMINU_D = 5; + AMOOR_D = 6; + AMOSWAP_D = 7; + AMOXOR_D = 8; + LR_D = 9; + SC_D = 10; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; + Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-files/riscv_c.proto =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-files/riscv_c.proto @@ -0,0 +1,79 @@ +//===-- riscv_c.proto - Protobuf description of Encoding ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the Protobuf representation of RISC-V's C-extension. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message C_RVOpcode { + enum ValueRange { + C_ADD = 0; + C_ADDI = 1; + C_ADDI16SP = 2; + C_ADDI4SPN = 3; + C_AND = 4; + C_ANDI = 5; + C_BEQZ = 6; + C_BNEZ = 7; + C_EBREAK = 8; + C_FLD = 9; + C_FLDSP = 10; + C_FLWSP = 11; + C_FSD = 12; + C_FSDSP = 13; + C_ILLEGAL = 14; + C_J = 15; + C_JAL = 16; + C_JALR = 17; + C_JR = 18; + C_LI = 19; + C_LUI = 20; + C_LW = 21; + C_LWSP = 22; + C_MV = 23; + C_NOP = 24; + C_OR = 25; + C_SLLI = 26; + C_SRAI = 27; + C_SRLI = 28; + C_SUB = 29; + C_SW = 30; + C_SWSP = 31; + C_XOR = 32; + }; + required ValueRange value = 1; +} + +message C_RV32Opcode { + enum ValueRange { + C_FLW = 0; + C_FSW = 1; + C_FSWSP = 2; + }; + required ValueRange value = 1; +} + +message C_RV64Opcode { + enum ValueRange { + C_ADDIW = 0; + C_ADDW = 1; + C_LD = 2; + C_LDSP = 3; + C_SD = 4; + C_SDSP = 5; + C_SUBW = 6; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; + Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-files/riscv_d.proto =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-files/riscv_d.proto @@ -0,0 +1,62 @@ +//===-- riscv_d.proto - Protobuf description of Encoding ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the Protobuf representation of RISC-V's D-extension. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message D_RVOpcode { + enum ValueRange { + FADD_D = 0; + FCLASS_D = 1; + FCVT_D_S = 2; + FCVT_D_W = 3; + FCVT_D_WU = 4; + FCVT_S_D = 5; + FCVT_W_D = 6; + FCVT_WU_D = 7; + FDIV_D = 8; + FEQ_D = 9; + FLD = 10; + FLE_D = 11; + FLT_D = 12; + FMADD_D = 13; + FMAX_D = 14; + FMIN_D = 15; + FMSUB_D = 16; + FMUL_D = 17; + FNMADD_D = 18; + FNMSUB_D = 19; + FSD = 20; + FSGNJ_D = 21; + FSGNJN_D = 22; + FSGNJX_D = 23; + FSQRT_D = 24; + FSUB_D = 25; + }; + required ValueRange value = 1; +} + +message D_RV64Opcode { + enum ValueRange { + FCVT_D_L = 0; + FCVT_D_LU = 1; + FCVT_L_D = 2; + FCVT_LU_D = 3; + FMV_D_X = 4; + FMV_X_D = 5; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; + Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-files/riscv_f.proto =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-files/riscv_f.proto @@ -0,0 +1,59 @@ +//===-- riscv_f.proto - Protobuf description of Encoding ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the Protobuf representation of RISC-V's F-extension. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message F_RVOpcode { + enum ValueRange { + FADD_S = 0; + FCLASS_S = 1; + FCVT_S_W = 2; + FCVT_S_WU = 3; + FCVT_W_S = 4; + FCVT_WU_S = 5; + FDIV_S = 6; + FEQ_S = 7; + FLE_S = 8; + FLT_S = 9; + FLW = 10; + FMAX_S = 11; + FMADD_S = 12; + FMIN_S = 13; + FMSUB_S = 14; + FMUL_S = 15; + FMV_X_W = 16; + FMV_W_X = 17; + FNMADD_S = 18; + FNMSUB_S = 19; + FSGNJ_S = 20; + FSGNJN_S = 21; + FSGNJX_S = 22; + FSQRT_S = 23; + FSUB_S = 24; + FSW = 25; + }; + required ValueRange value = 1; +} + +message F_RV64Opcode { + enum ValueRange { + FCVT_L_S = 0; + FCVT_LU_S = 1; + FCVT_S_L = 2; + FCVT_S_LU = 3; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-files/riscv_i.proto =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-files/riscv_i.proto @@ -0,0 +1,88 @@ +//===-- riscv_i.proto - Protobuf description of Encoding ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the Protobuf representation of RISC-V's I-extension. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message I_RVOpcode { + enum ValueRange { + ADD = 0; + ADDI = 1; + AND = 2; + ANDI = 3; + AUIPC = 4; + BEQ = 5; + BGE = 6; + BGEU = 7; + BLT = 8; + BLTU = 9; + BNE = 10; + CSRRC = 11; + CSRRCI = 12; + CSRRS = 13; + CSRRSI = 14; + CSRRW = 15; + CSRRWI = 16; + EBREAK = 17; + ECALL = 18; + FENCE = 19; + FENCE_I = 20; + JAL = 21; + JALR = 22; + LB = 23; + LBU = 24; + LH = 25; + LHU = 26; + LW = 27; + LUI = 28; + OR = 29; + ORI = 30; + SB = 31; + SH = 32; + SLL = 33; + SLLI = 34; + SLT = 35; + SLTI = 36; + SLTU = 37; + SLTIU = 38; + SRA = 39; + SRAI = 40; + SRL = 41; + SRLI = 42; + SUB = 43; + SW = 44; + XOR = 45; + XORI = 46; + }; + required ValueRange value = 1; +} + +message I_RV64Opcode { + enum ValueRange { + ADDIW = 0; + ADDW = 1; + LD = 2; + LWU = 3; + SD = 4; + SLLIW = 5; + SLLW = 6; + SRAIW = 7; + SRAW = 8; + SRLIW = 9; + SRLW = 10; + SUBW = 11; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-files/riscv_m.proto =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-files/riscv_m.proto @@ -0,0 +1,42 @@ +//===-- riscv_m.proto - Protobuf description of Encoding ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the Protobuf representation of RISC-V's M-extension. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message M_RVOpcode { + enum ValueRange { + DIV = 0; + DIVU = 1; + MUL = 2; + MULH = 3; + MULHSU = 4; + MULHU = 5; + REM = 6; + REMU = 7; + }; + required ValueRange value = 1; +} + +message M_RV64Opcode { + enum ValueRange { + DIVUW = 0; + DIVW = 1; + MULW = 2; + REMUW = 3; + REMW = 4; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-files/unconstrained_encoding.proto =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-files/unconstrained_encoding.proto @@ -0,0 +1,24 @@ +//===-- unconstrained_encoding.proto - Protobuf description of Encoding ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes a subset of RISC-V machine instruction encodings as a +/// protobuf. It is used by the example fuzzer to generate basic inputs +/// to fuzz the LLVM MC layer. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +// Generate an unsigned 32-bit value to represent a machine instruction. +message Encoding { + required uint32 encoding = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/CMakeLists.txt @@ -0,0 +1,43 @@ +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD}) +set(CMAKE_CXX_FLAGS ${CXX_FLAGS_NOFUZZ}) + +# Needed by LLVM's CMake checks because this file defines multiple targets. +set(LLVM_OPTIONAL_SOURCES + example_proto_to_encoding.cpp + proto_to_encoding.cpp + proto_to_encoding_main.cpp + unconstrained_proto_to_encoding.cpp + riscv_inst_encoding.cpp) + +llvm_add_library(mcProtoToEncoding + proto_to_encoding.cpp + riscv_inst_encoding.cpp + DEPENDS mcEncodingProto + LINK_LIBS mcEncodingProto ${PROTOBUF_LIBRARIES} + ) +llvm_add_library(mcExampleProtoToEncoding + example_proto_to_encoding.cpp + riscv_inst_encoding.cpp + DEPENDS mcExampleEncodingProto + LINK_LIBS mcExampleEncodingProto ${PROTOBUF_LIBRARIES} + ) +llvm_add_library(mcUnconstrainedProtoToEncoding + unconstrained_proto_to_encoding.cpp + riscv_inst_encoding.cpp + DEPENDS mcUnconstrainedEncodingProto + LINK_LIBS mcUnconstrainedEncodingProto ${PROTOBUF_LIBRARIES} + ) + +add_llvm_executable(llvm-mc-disassemble-proto-to-encoding-riscv + proto_to_encoding_main.cpp) +add_llvm_executable(llvm-mc-disassemble-proto-to-encoding-example + proto_to_encoding_main.cpp) +add_llvm_executable(llvm-mc-disassemble-proto-to-encoding-unconstrained + proto_to_encoding_main.cpp) + +target_link_libraries(llvm-mc-disassemble-proto-to-encoding-riscv PRIVATE + mcProtoToEncoding) +target_link_libraries(llvm-mc-disassemble-proto-to-encoding-example PRIVATE + mcExampleProtoToEncoding) +target_link_libraries(llvm-mc-disassemble-proto-to-encoding-unconstrained + PRIVATE mcUnconstrainedProtoToEncoding) Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/example_proto_to_encoding.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/example_proto_to_encoding.cpp @@ -0,0 +1,55 @@ +//==-- example_proto_to_encoding.cpp - Protobuf-Encoding conversion --------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements functions for converting between protobufs and bit patterns +// that represent a subset of the RISC-V machine instruction encodings. This +// example version of the fuzzer just encodes the ADDI and XORI instructions. +// +//===----------------------------------------------------------------------===// +#include "example_encoding_proto.pb.h" +#include "proto_to_encoding.h" +#include "riscv_inst_encoding.h" + +#include +#include +#include +#include + +using namespace google::protobuf; + +namespace mc_proto_fuzzer { + +std::ostream &operator<<(std::ostream &OS, const Inst &X) { + uint32_t InstEnc = X.encoding(); + const EnumDescriptor *Enum = X.opcode().ValueRange_descriptor(); + const EnumValueDescriptor *D = Enum->FindValueByNumber(X.opcode().value()); + assert(D && "invalid instruction opcode"); + InstEnc = GenerateInstEncoding(D->name(), InstEnc, ExampleInsts); + PrintEncoding(OS, InstEnc, false /*Compressed*/); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const Encoding &X) { + return OS << X.instruction() << "\n"; +} + +std::string FunctionToString(const Encoding &Input) { + std::ostringstream OS; + OS << Input; + return OS.str(); +} + +std::string ProtoToEncoding(const uint8_t *Data, size_t Size) { + Encoding Message; + if (!Message.ParsePartialFromArray(Data, Size)) + return "#error invalid proto\n"; + return FunctionToString(Message); +} + +} // namespace mc_proto_fuzzer Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/proto_to_encoding.h =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/proto_to_encoding.h @@ -0,0 +1,22 @@ +//==-- proto_to_encoding.h - Protobuf-Encoding conversion ------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Declares functions for converting between protobufs and encoding of RISC-V +// machine instructions. +// +//===----------------------------------------------------------------------===// + +#include + +namespace mc_proto_fuzzer { +class Encoding; + +std::string FunctionToString(const Encoding &input); +std::string ProtoToEncoding(const uint8_t *data, size_t size); +} Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/proto_to_encoding.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/proto_to_encoding.cpp @@ -0,0 +1,125 @@ +//==-- proto_to_encoding.cpp - Protobuf-Encoding conversion ----------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements functions for converting between protobufs and bit patterns +// that represent a subset of the RISC-V machine instruction encodings. +// Calls GenerateInstEncoding to generate a 32-bit instruction encoding, based +// on the fuzzer-generated opcode and random 32-bit value, combined with the +// specific fixed fields for the opcode, as defined in riscv_inst_encoding.h. +// +//===----------------------------------------------------------------------===// +#include "proto_to_encoding.h" +#include "riscv.pb.h" +#include "riscv_i.pb.h" +#include "riscv_inst_encoding.h" +#include "riscv_m.pb.h" +#include +#include +#include +#include + +using namespace google::protobuf; + +namespace mc_proto_fuzzer { + +std::ostream &operator<<(std::ostream &OS, const Inst &X) { + uint32_t InstEnc = X.encoding(); + bool Compressed = false; + if (X.has_opcode1()) { + const auto *Enum = X.opcode1().ValueRange_descriptor(); + const auto *D = Enum->FindValueByNumber(X.opcode1().value()); + assert(D && "invalid instruction opcode"); + InstEnc = GenerateInstEncoding(D->name(), InstEnc, RVAInsts); + } else if (X.has_opcode2()) { + const auto *Enum = X.opcode2().ValueRange_descriptor(); + const auto *D = Enum->FindValueByNumber(X.opcode2().value()); + InstEnc = GenerateInstEncoding(D->name(), InstEnc, RV64AInsts); + } else if (X.has_opcode3()) { + const auto *Enum = X.opcode3().ValueRange_descriptor(); + const auto *D = Enum->FindValueByNumber(X.opcode3().value()); + assert(D && "invalid instruction opcode"); + InstEnc = GenerateInstEncoding(D->name(), InstEnc, RVCInsts); + Compressed = true; + } else if (X.has_opcode4()) { + const auto *Enum = X.opcode4().ValueRange_descriptor(); + const auto *D = Enum->FindValueByNumber(X.opcode4().value()); + assert(D && "invalid instruction opcode"); + InstEnc = GenerateInstEncoding(D->name(), InstEnc, RV32CInsts); + Compressed = true; + } else if (X.has_opcode5()) { + const auto *Enum = X.opcode5().ValueRange_descriptor(); + const auto *D = Enum->FindValueByNumber(X.opcode5().value()); + assert(D && "invalid instruction opcode"); + InstEnc = GenerateInstEncoding(D->name(), InstEnc, RV64CInsts); + Compressed = true; + } else if (X.has_opcode6()) { + const auto *Enum = X.opcode6().ValueRange_descriptor(); + const auto *D = Enum->FindValueByNumber(X.opcode6().value()); + assert(D && "invalid instruction opcode"); + InstEnc = GenerateInstEncoding(D->name(), InstEnc, RVDInsts); + } else if (X.has_opcode7()) { + const auto *Enum = X.opcode7().ValueRange_descriptor(); + const auto *D = Enum->FindValueByNumber(X.opcode7().value()); + assert(D && "invalid instruction opcode"); + InstEnc = GenerateInstEncoding(D->name(), InstEnc, RV64DInsts); + } else if (X.has_opcode8()) { + const auto *Enum = X.opcode8().ValueRange_descriptor(); + const auto *D = Enum->FindValueByNumber(X.opcode8().value()); + assert(D && "invalid instruction opcode"); + InstEnc = GenerateInstEncoding(D->name(), InstEnc, RVFInsts); + } else if (X.has_opcode9()) { + const auto *Enum = X.opcode9().ValueRange_descriptor(); + const auto *D = Enum->FindValueByNumber(X.opcode9().value()); + assert(D && "invalid instruction opcode"); + InstEnc = GenerateInstEncoding(D->name(), InstEnc, RV64FInsts); + } else if (X.has_opcode10()) { + const auto *Enum = X.opcode10().ValueRange_descriptor(); + const auto *D = Enum->FindValueByNumber(X.opcode10().value()); + assert(D && "invalid instruction opcode"); + InstEnc = GenerateInstEncoding(D->name(), InstEnc, RVIInsts); + } else if (X.has_opcode11()) { + const auto *Enum = X.opcode11().ValueRange_descriptor(); + const auto *D = Enum->FindValueByNumber(X.opcode11().value()); + assert(D && "invalid instruction opcode"); + InstEnc = GenerateInstEncoding(D->name(), InstEnc, RV64IInsts); + } else if (X.has_opcode12()) { + const auto *Enum = X.opcode12().ValueRange_descriptor(); + const auto *D = Enum->FindValueByNumber(X.opcode12().value()); + assert(D && "invalid instruction opcode"); + InstEnc = GenerateInstEncoding(D->name(), InstEnc, RVMInsts); + } else if (X.has_opcode13()) { + const auto *Enum = X.opcode13().ValueRange_descriptor(); + const auto *D = Enum->FindValueByNumber(X.opcode13().value()); + assert(D && "invalid instruction opcode"); + InstEnc = GenerateInstEncoding(D->name(), InstEnc, RV64MInsts); + } else { + InstEnc = GenerateInstEncoding("ADD", InstEnc, RVIInsts); + } + PrintEncoding(OS, InstEnc, Compressed); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const Encoding &X) { + return OS << X.instruction() << "\n"; +} + +std::string FunctionToString(const Encoding &Input) { + std::ostringstream OS; + OS << Input; + return OS.str(); +} + +std::string ProtoToEncoding(const uint8_t *Data, size_t Size) { + Encoding Message; + if (!Message.ParsePartialFromArray(Data, Size)) + return "#error invalid proto\n"; + return FunctionToString(Message); +} + +} // namespace mc_proto_fuzzer Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/proto_to_encoding_main.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/proto_to_encoding_main.cpp @@ -0,0 +1,30 @@ +//==-- proto_to_encoding_main.cpp - Driver for protobuf-encoding conversion ==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements a simple driver to print hexadecimal encodings of machine +// instructions program from a protobuf. +// +//===----------------------------------------------------------------------===// +#include "proto_to_encoding.h" + +#include +#include +#include +#include + +int main(int argc, char **argv) { + for (int i = 1; i < argc; ++i) { + std::fstream in(argv[i]); + std::string str((std::istreambuf_iterator(in)), + std::istreambuf_iterator()); + std::cout << mc_proto_fuzzer::ProtoToEncoding( + reinterpret_cast(str.data()), str.size()); + } + return 0; +} Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/riscv_inst_encoding.h =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/riscv_inst_encoding.h @@ -0,0 +1,329 @@ +//==-- riscv_inst_encoding.h - Protobuf-Encoding conversion ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines structs and tables for converting between protobufs and encoding of +// RISC-V machine instructions, according to the RISC-V Instruction Set Manual +// (https://content.riscv.org/wp-content/uploads/2017/05/riscv-spec-v2.2.pdf). +// Fixed fields are listed in order from lowest bits to highest (for example, +// opcode field is always listed first as begins at the 0th bit). +// +//===----------------------------------------------------------------------===// + +#include "proto_to_encoding.h" +#include +#include + +using namespace mc_proto_fuzzer; + +#ifndef INSTRUCTION_TABLES_H +#define INSTRUCTION_TABLES_H + +struct Field { + uint32_t Value; + uint32_t Pos; + uint32_t Size; +}; + +struct Instruction { + std::vector Fields; +}; + +static std::map ExampleInsts { + {"ADDI", {{{0b0010011, 0, 7}, {0b000, 12, 3}}}}, + {"XORI", {{{0b0010011, 0, 7}, {0b100, 12, 3}}}}}; + +static std::map RVAInsts { + {"AMOADD_W", {{{0b0101111, 0, 7}, {0b010, 12, 3}, {0b00000, 27, 5}}}}, + {"AMOAND_W", {{{0b0101111, 0, 7}, {0b010, 12, 3}, {0b01100, 27, 5}}}}, + {"AMOMAX_W", {{{0b0101111, 0, 7}, {0b010, 12, 3}, {0b10100, 27, 5}}}}, + {"AMOMAXU_W", {{{0b0101111, 0, 7}, {0b010, 12, 3}, {0b11100, 27, 5}}}}, + {"AMOMIN_W", {{{0b0101111, 0, 7}, {0b010, 12, 3}, {0b10000, 27, 5}}}}, + {"AMOMINU_W", {{{0b0101111, 0, 7}, {0b010, 12, 3}, {0b11000, 27, 5}}}}, + {"AMOOR_W", {{{0b0101111, 0, 7}, {0b010, 12, 3}, {0b01000, 27, 5}}}}, + {"AMOSWAP_W", {{{0b0101111, 0, 7}, {0b010, 12, 3}, {0b00001, 27, 5}}}}, + {"AMOXOR_W", {{{0b0101111, 0, 7}, {0b010, 12, 3}, {0b00100, 27, 5}}}}, + {"LR_W", + {{{0b0101111, 0, 7}, {0b010, 12, 3}, {0b00000, 20, 5}, {0b00010, 27, 5}}}}, + {"SC_W", {{{0b0101111, 0, 7}, {0b010, 12, 3}, {0b00011, 27, 5}}}}}; + +static std::map RV64AInsts { + {"AMOADD_D", {{{0b0101111, 0, 7}, {0b011, 12, 3}, {0b00000, 27, 5}}}}, + {"AMOAND_D", {{{0b0101111, 0, 7}, {0b011, 12, 3}, {0b01100, 27, 5}}}}, + {"AMOMAX_D", {{{0b0101111, 0, 7}, {0b011, 12, 3}, {0b10100, 27, 5}}}}, + {"AMOMAXU_D", {{{0b0101111, 0, 7}, {0b011, 12, 3}, {0b11100, 27, 5}}}}, + {"AMOMIN_D", {{{0b0101111, 0, 7}, {0b011, 12, 3}, {0b10000, 27, 5}}}}, + {"AMOMINU_D", {{{0b0101111, 0, 7}, {0b011, 12, 3}, {0b11000, 27, 5}}}}, + {"AMOOR_D", {{{0b0101111, 0, 7}, {0b011, 12, 3}, {0b01000, 27, 5}}}}, + {"AMOSWAP_D", {{{0b0101111, 0, 7}, {0b011, 12, 3}, {0b00001, 27, 5}}}}, + {"AMOXOR_D", {{{0b0101111, 0, 7}, {0b011, 12, 3}, {0b00100, 27, 5}}}}, + {"LR_D", + {{{0b0101111, 0, 7}, {0b011, 12, 3}, {0b00000, 20, 5}, {0b00010, 27, 5}}}}, + {"SC_D", {{{0b0101111, 0, 7}, {0b011, 12, 3}, {0b00011, 27, 5}}}}}; + +static std::map RVCInsts { + {"C_ADD", {{{0b10, 0, 2}, {0b1, 12, 1}, {0b100, 13, 3}}}}, + {"C_ADDI", {{{0b01, 0, 2}, {0b000, 13, 3}}}}, + {"C_ADDI16SP", {{{0b01, 0, 2}, {0b00010, 7, 5}, {0b011, 13, 3}}}}, + {"C_ADDI4SPN", {{{0b00, 0, 2}, {0b000, 13, 3}}}}, + {"C_AND", {{{0b01, 0, 2}, {0b11, 5, 2}, {0b11, 10, 2}, {0b100, 13, 3}}}}, + {"C_ANDI", {{{0b01, 0, 2}, {0b10, 10, 2}, {0b100, 13, 3}}}}, + {"C_BEQZ", {{{0b01, 0, 2}, {0b110, 13, 3}}}}, + {"C_BNEZ", {{{0b01, 0, 2}, {0b111, 13, 3}}}}, + {"C_EBREAK", + {{{0b10, 0, 2}, + {0b00000, 2, 5}, + {0b00000, 7, 5}, + {0b1, 12, 1}, + {0b100, 13, 3}}}}, + {"C_FLD", {{{0b00, 0, 2}, {0b001, 13, 3}}}}, + {"C_FLDSP", {{{0b10, 0, 2}, {0b001, 13, 3}}}}, + {"C_FLWSP", {{{0b10, 0, 2}, {0b011, 13, 3}}}}, + {"C_FSD", {{{0b00, 0, 2}, {0b101, 13, 3}}}}, + {"C_FSDSP", {{{0b10, 0, 2}, {0b101, 13, 3}}}}, + {"C_ILLEGAL", + {{{0b00, 0, 2}, {0b000, 2, 3}, {0b00000000, 5, 8}, {0b000, 13, 3}}}}, + {"C_J", {{{0b01, 0, 2}, {0b101, 13, 3}}}}, + {"C_JAL", {{{0b01, 0, 2}, {0b001, 13, 3}}}}, + {"C_JALR", {{{0b10, 0, 2}, {0b00000, 2, 5}, {0b1, 12, 1}, {0b100, 13, 3}}}}, + {"C_JR", {{{0b10, 0, 2}, {0b00000, 2, 5}, {0b0, 12, 1}, {0b100, 13, 3}}}}, + {"C_LI", {{{0b01, 0, 2}, {0b010, 13, 3}}}}, + {"C_LUI", {{{0b01, 0, 2}, {0b011, 13, 3}}}}, + {"C_LW", {{{0b00, 0, 2}, {0b010, 13, 3}}}}, + {"C_LWSP", {{{0b10, 0, 2}, {0b010, 13, 3}}}}, + {"C_MV", {{{0b10, 0, 2}, {0b0, 12, 1}, {0b100, 13, 3}}}}, + {"C_NOP", + {{{0b01, 0, 2}, + {0b00000, 2, 5}, + {0b00000, 7, 5}, + {0b0, 12, 1}, + {0b000, 13, 3}}}}, + {"C_OR", {{{0b01, 0, 2}, {0b10, 5, 2}, {0b11, 10, 2}, {0b100, 13, 3}}}}, + {"C_SLLI", {{{0b10, 0, 2}, {0b000, 13, 3}}}}, + {"C_SRAI", {{{0b01, 0, 2}, {0b01, 10, 2}, {0b100, 13, 3}}}}, + {"C_SRLI", {{{0b01, 0, 2}, {0b00, 10, 2}, {0b100, 13, 3}}}}, + {"C_SUB", {{{0b01, 0, 2}, {0b00, 5, 2}, {0b11, 10, 2}, {0b100, 13, 3}}}}, + {"C_SW", {{{0b00, 0, 2}, {0b110, 13, 3}}}}, + {"C_SWSP", {{{0b10, 0, 2}, {0b110, 13, 3}}}}, + {"C_XOR", {{{0b01, 0, 2}, {0b01, 5, 2}, {0b11, 10, 2}, {0b100, 13, 3}}}}}; + +static std::map RV32CInsts { + {"C_FLW", {{{0b00, 0, 2}, {0b011, 13, 3}}}}, + {"C_FSW", {{{0b00, 0, 2}, {0b111, 13, 3}}}}, + {"C_FSWSP", {{{0b10, 0, 2}, {0b111, 13, 3}}}}}; + +static std::map RV64CInsts { + {"C_ADDIW", {{{0b01, 0, 2}, {0b001, 13, 3}}}}, + {"C_ADDW", {{{0b01, 0, 2}, {0b01, 5, 2}, {0b11, 10, 2}, {0b100, 13, 3}}}}, + {"C_LD", {{{0b00, 0, 2}, {0b011, 13, 3}}}}, + {"C_LDSP", {{{0b10, 0, 2}, {0b011, 13, 3}}}}, + {"C_SD", {{{0b00, 0, 2}, {0b111, 13, 3}}}}, + {"C_SDSP", {{{0b10, 0, 2}, {0b111, 13, 3}}}}, + {"C_SUBW", {{{0b01, 0, 2}, {0b00, 5, 2}, {0b11, 10, 2}, {0b100, 13, 3}}}}}; + +static std::map RVDInsts { + {"FADD_D", {{{0b1010011, 0, 7}, {0b0000001, 25, 7}}}}, + {"FCLASS_D", + {{{0b1010011, 0, 7}, + {0b001, 12, 3}, + {0b00000, 20, 5}, + {0b1110001, 25, 7}}}}, + {"FCVT_D_S", {{{0b1010011, 0, 7}, {0b00000, 20, 5}, {0b0100001, 25, 7}}}}, + {"FCVT_D_W", {{{0b1010011, 0, 7}, {0b00000, 20, 5}, {0b1101001, 25, 7}}}}, + {"FCVT_D_WU", {{{0b1010011, 0, 7}, {0b00001, 20, 5}, {0b1101001, 25, 7}}}}, + {"FCVT_S_D", {{{0b1010011, 0, 7}, {0b00001, 20, 5}, {0b0100000, 25, 7}}}}, + {"FCVT_W_D", {{{0b1010011, 0, 7}, {0b00000, 20, 5}, {0b1100001, 25, 7}}}}, + {"FCVT_WU_D", {{{0b1010011, 0, 7}, {0b00001, 20, 5}, {0b1100001, 25, 7}}}}, + {"FDIV_D", {{{0b1010011, 0, 7}, {0b0001101, 25, 7}}}}, + {"FEQ_D", {{{0b1010011, 0, 7}, {0b010, 12, 3}, {0b1010001, 25, 7}}}}, + {"FLD", {{{0b0000111, 0, 7}, {0b011, 12, 3}}}}, + {"FLE_D", {{{0b1010011, 0, 7}, {0b000, 12, 3}, {0b1010001, 25, 7}}}}, + {"FLT_D", {{{0b1010011, 0, 7}, {0b001, 12, 3}, {0b1010001, 25, 7}}}}, + {"FMADD_D", {{{0b1000011, 0, 7}, {0b01, 25, 2}}}}, + {"FMAX_D", {{{0b1010011, 0, 7}, {0b001, 12, 3}, {0b0010101, 25, 7}}}}, + {"FMIN_D", {{{0b1010011, 0, 7}, {0b000, 12, 3}, {0b0010101, 25, 7}}}}, + {"FMSUB_D", {{{0b1000111, 0, 7}, {0b01, 25, 2}}}}, + {"FMUL_D", {{{0b1010011, 0, 7}, {0b0001001, 25, 7}}}}, + {"FNMADD_D", {{{0b1001011, 0, 7}, {0b01, 25, 2}}}}, + {"FNMSUB_D", {{{0b1001111, 0, 7}, {0b01, 25, 2}}}}, + {"FSD", {{{0b0100111, 0, 7}, {0b011, 12, 3}}}}, + {"FSGNJ_D", {{{0b1010011, 0, 7}, {0b000, 12, 3}, {0b0010001, 25, 7}}}}, + {"FSGNJN_D", {{{0b1010011, 0, 7}, {0b001, 12, 3}, {0b0010001, 25, 7}}}}, + {"FSGNJX_D", {{{0b1010011, 0, 7}, {0b010, 12, 3}, {0b0010001, 25, 7}}}}, + {"FSQRT_D", {{{0b1010011, 0, 7}, {0b00000, 20, 5}, {0b0101101, 25, 7}}}}, + {"FSUB_D", {{{0b1010011, 0, 7}, {0b0000101, 25, 7}}}}}; + +static std::map RV64DInsts { + {"FCVT_D_L", {{{0b1010011, 0, 7}, {0b00010, 20, 5}, {0b1101001, 25, 7}}}}, + {"FCVT_D_LU", {{{0b1010011, 0, 7}, {0b00011, 20, 5}, {0b1101001, 25, 7}}}}, + {"FCVT_L_D", {{{0b1010011, 0, 7}, {0b00010, 20, 5}, {0b1100001, 25, 7}}}}, + {"FCVT_LU_D", {{{0b1010011, 0, 7}, {0b00011, 20, 5}, {0b1100001, 25, 7}}}}, + {"FMV_D_X", + {{{0b1010011, 0, 7}, + {0b000, 12, 3}, + {0b00000, 20, 5}, + {0b1111001, 25, 7}}}}, + {"FMV_X_D", + {{{0b1010011, 0, 7}, + {0b000, 12, 3}, + {0b00000, 20, 5}, + {0b1110001, 25, 7}}}}}; + +static std::map RVFInsts { + {"FADD_S", {{{0b1010011, 0, 7}, {0b0000000, 25, 7}}}}, + {"FCLASS_S", + {{{0b1010011, 0, 7}, + {0b001, 12, 3}, + {0b00000, 20, 5}, + {0b1110000, 25, 7}}}}, + {"FCVT_S_W", {{{0b1010011, 0, 7}, {0b00000, 20, 5}, {0b1101000, 25, 7}}}}, + {"FCVT_S_WU", {{{0b1010011, 0, 7}, {0b00001, 20, 5}, {0b1101000, 25, 7}}}}, + {"FCVT_W_S", {{{0b1010011, 0, 7}, {0b00000, 20, 5}, {0b1100000, 25, 7}}}}, + {"FCVT_WU_S", {{{0b1010011, 0, 7}, {0b00001, 20, 5}, {0b1100000, 25, 7}}}}, + {"FDIV_S", {{{0b1010011, 0, 7}, {0b0001100, 25, 7}}}}, + {"FEQ_S", {{{0b1010011, 0, 7}, {0b010, 12, 3}, {0b1010000, 25, 7}}}}, + {"FLE_S", {{{0b1010011, 0, 7}, {0b000, 12, 3}, {0b1010000, 25, 7}}}}, + {"FLT_S", {{{0b1010011, 0, 7}, {0b001, 12, 3}, {0b1010000, 25, 7}}}}, + {"FLW", {{{0b0000111, 0, 7}, {0b010, 12, 3}}}}, + {"FMAX_S", {{{0b1010011, 0, 7}, {0b001, 12, 3}, {0b0010100, 25, 7}}}}, + {"FMADD_S", {{{0b1000011, 0, 7}, {0b00, 25, 2}}}}, + {"FMIN_S", {{{0b1010011, 0, 7}, {0b000, 12, 3}, {0b0010100, 25, 7}}}}, + {"FMSUB_S", {{{0b1000111, 0, 7}, {0b00, 25, 2}}}}, + {"FMUL_S", {{{0b1010011, 0, 7}, {0b0001000, 25, 7}}}}, + {"FMV_X_W", + {{{0b1010011, 0, 7}, + {0b000, 12, 3}, + {0b00000, 20, 5}, + {0b1110000, 25, 7}}}}, + {"FMV_W_X", + {{{0b1010011, 0, 7}, + {0b000, 12, 3}, + {0b00000, 20, 5}, + {0b1111000, 25, 7}}}}, + {"FNMADD_S", {{{0b1001111, 0, 7}, {0b00, 25, 2}}}}, + {"FNMSUB_S", {{{0b1001011, 0, 7}, {0b00, 25, 2}}}}, + {"FSGNJ_S", {{{0b1010011, 0, 7}, {0b000, 12, 3}, {0b0010000, 25, 7}}}}, + {"FSGNJN_S", {{{0b1010011, 0, 7}, {0b001, 12, 3}, {0b0010000, 25, 7}}}}, + {"FSGNJX_S", {{{0b1010011, 0, 7}, {0b010, 12, 3}, {0b0010000, 25, 7}}}}, + {"FSQRT_S", {{{0b1010011, 0, 7}, {0b00000, 20, 5}, {0b0101100, 25, 7}}}}, + {"FSUB_S", {{{0b1010011, 0, 7}, {0b0000100, 25, 7}}}}, + {"FSW", {{{0b0100111, 0, 7}, {0b010, 12, 3}}}}}; + +static std::map RV64FInsts { + {"FCVT_L_S", {{{0b1010011, 0, 7}, {0b00010, 20, 5}, {0b1100000, 25, 7}}}}, + {"FCVT_LU_S", {{{0b1010011, 0, 7}, {0b00011, 20, 5}, {0b1100000, 25, 7}}}}, + {"FCVT_S_L", {{{0b1010011, 0, 7}, {0b00010, 20, 5}, {0b1101000, 25, 7}}}}, + {"FCVT_S_LU", {{{0b1010011, 0, 7}, {0b00011, 20, 5}, {0b1101000, 25, 7}}}}}; + +static std::map RVIInsts { + {"ADD", {{{0b0110011, 0, 7}, {0b000, 12, 3}, {0b0000000, 25, 7}}}}, + {"ADDI", {{{0b0010011, 0, 7}, {0b000, 12, 3}}}}, + {"AND", {{{0b0110011, 0, 7}, {0b111, 12, 3}, {0b0000000, 25, 7}}}}, + {"ANDI", {{{0b0010011, 0, 7}, {0b111, 12, 3}}}}, + {"AUIPC", {{{0b0010111, 0, 7}}}}, + {"BEQ", {{{0b1100011, 0, 7}, {0b000, 12, 3}}}}, + {"BGE", {{{0b1100011, 0, 7}, {0b101, 12, 3}}}}, + {"BGEU", {{{0b1100011, 0, 7}, {0b111, 12, 3}}}}, + {"BLT", {{{0b1100011, 0, 7}, {0b100, 12, 3}}}}, + {"BLTU", {{{0b1100011, 0, 7}, {0b110, 12, 3}}}}, + {"BNE", {{{0b1100011, 0, 7}, {0b001, 12, 3}}}}, + {"CSRRC", {{{0b1110011, 0, 7}, {0b011, 12, 3}}}}, + {"CSRRCI", {{{0b1110011, 0, 7}, {0b111, 12, 3}}}}, + {"CSRRS", {{{0b1110011, 0, 7}, {0b010, 12, 3}}}}, + {"CSRRSI", {{{0b1110011, 0, 7}, {0b110, 12, 3}}}}, + {"CSRRW", {{{0b1110011, 0, 7}, {0b001, 12, 3}}}}, + {"CSRRWI", {{{0b1110011, 0, 7}, {0b101, 12, 3}}}}, + {"EBREAK", + {{{0b1110011, 0, 7}, + {0b00000, 7, 5}, + {0b000, 12, 3}, + {0b00000, 15, 5}, + {0b000000000001, 20, 12}}}}, + {"ECALL", + {{{0b1110011, 0, 7}, + {0b00000, 7, 5}, + {0b000, 12, 3}, + {0b00000, 15, 5}, + {0b000000000000, 20, 12}}}}, + {"FENCE", + {{{0b0001111, 0, 7}, + {0b00000, 7, 5}, + {0b000, 12, 3}, + {0b00000, 15, 5}, + {0b0000, 28, 4}}}}, + {"FENCE.I", + {{{0b0001111, 0, 7}, + {0b00000, 7, 5}, + {0b001, 12, 3}, + {0b00000, 15, 5}, + {0b0000, 20, 4}, + {0b0000, 24, 4}, + {0b0000, 28, 4}}}}, + {"JAL", {{{0b1101111, 0, 7}}}}, + {"JALR", {{{0b1101111, 0, 7}, {0b000, 12, 3}}}}, + {"LB", {{{0b0000011, 0, 7}, {0b000, 12, 3}}}}, + {"LBU", {{{0b0000011, 0, 7}, {0b100, 12, 3}}}}, + {"LH", {{{0b0000011, 0, 7}, {0b001, 12, 3}}}}, + {"LHU", {{{0b0000011, 0, 7}, {0b101, 12, 3}}}}, + {"LW", {{{0b0000011, 0, 7}, {0b010, 12, 3}}}}, + {"LUI", {{{0b0110111, 0, 7}}}}, + {"OR", {{{0b0110011, 0, 7}, {0b110, 12, 3}, {0b0000000, 25, 7}}}}, + {"ORI", {{{0b0010011, 0, 7}, {0b110, 12, 3}}}}, + {"SB", {{{0b0100011, 0, 7}, {0b000, 12, 3}}}}, + {"SH", {{{0b0100011, 0, 7}, {0b001, 12, 3}}}}, + {"SLL", {{{0b0110011, 0, 7}, {0b001, 12, 3}, {0b0000000, 25, 7}}}}, + {"SLLI", {{{0b0010011, 0, 7}, {0b001, 12, 3}, {0b0000000, 25, 7}}}}, + {"SLT", {{{0b0110011, 0, 7}, {0b010, 12, 3}, {0b0000000, 25, 7}}}}, + {"SLTI", {{{0b0010011, 0, 7}, {0b010, 12, 3}}}}, + {"SLTU", {{{0b0110011, 0, 7}, {0b011, 12, 3}, {0b0000000, 25, 7}}}}, + {"SLTIU", {{{0b0010011, 0, 7}, {0b011, 12, 3}}}}, + {"SRA", {{{0b0110011, 0, 7}, {0b101, 12, 3}, {0b0100000, 25, 7}}}}, + {"SRAI", {{{0b0010011, 0, 7}, {0b101, 12, 3}, {0b0100000, 25, 7}}}}, + {"SRL", {{{0b0110011, 0, 7}, {0b101, 12, 3}, {0b0000000, 25, 7}}}}, + {"SRLI", {{{0b0010011, 0, 7}, {0b101, 12, 3}, {0b0000000, 25, 7}}}}, + {"SUB", {{{0b0110011, 0, 7}, {0b000, 12, 3}, {0b0100000, 25, 7}}}}, + {"SW", {{{0b0100011, 0, 7}, {0b010, 12, 3}}}}, + {"XOR", {{{0b0110011, 0, 7}, {0b100, 12, 3}, {0b0000000, 25, 7}}}}, + {"XORI", {{{0b0010011, 0, 7}, {0b100, 12, 3}}}}}; + +static std::map RV64IInsts { + {"ADDIW", {{{0b0011011, 0, 7}, {0b000, 12, 3}}}}, + {"ADDW", {{{0b0111011, 0, 7}, {0b000, 12, 3}, {0b0000000, 25, 7}}}}, + {"LD", {{{0b0000011, 0, 7}, {0b011, 12, 3}}}}, + {"LWU", {{{0b0000011, 0, 7}, {0b110, 12, 3}}}}, + {"SD", {{{0b0100011, 0, 7}, {0b011, 12, 3}}}}, + {"SLLIW", {{{0b0011011, 0, 7}, {0b001, 12, 3}, {0b0000000, 25, 7}}}}, + {"SLLW", {{{0b0111011, 0, 7}, {0b001, 12, 3}, {0b0000000, 25, 7}}}}, + {"SRAIW", {{{0b0011011, 0, 7}, {0b101, 12, 3}, {0b0100000, 25, 7}}}}, + {"SRAW", {{{0b0111011, 0, 7}, {0b101, 12, 3}, {0b0100000, 25, 7}}}}, + {"SRLIW", {{{0b0011011, 0, 7}, {0b101, 12, 3}, {0b0000000, 25, 7}}}}, + {"SRLW", {{{0b0111011, 0, 7}, {0b101, 12, 3}, {0b0000000, 25, 7}}}}, + {"SUBW", {{{0b0111011, 0, 7}, {0b000, 12, 3}, {0b0100000, 25, 7}}}}}; + +static std::map RVMInsts { + {"DIV", {{{0b0110011, 0, 7}, {0b100, 12, 3}, {0b0000001, 25, 7}}}}, + {"DIVU", {{{0b0110011, 0, 7}, {0b101, 12, 3}, {0b0000001, 25, 7}}}}, + {"MUL", {{{0b0110011, 0, 7}, {0b000, 12, 3}, {0b0000001, 25, 7}}}}, + {"MULH", {{{0b0110011, 0, 7}, {0b001, 12, 3}, {0b0000001, 25, 7}}}}, + {"MULHSU", {{{0b0110011, 0, 7}, {0b010, 12, 3}, {0b0000001, 25, 7}}}}, + {"MULHU", {{{0b0110011, 0, 7}, {0b011, 12, 3}, {0b0000001, 25, 7}}}}, + {"REM", {{{0b0110011, 0, 7}, {0b110, 12, 3}, {0b0000001, 25, 7}}}}, + {"REMU", {{{0b0110011, 0, 7}, {0b111, 12, 3}, {0b0000001, 25, 7}}}}}; + +static std::map RV64MInsts { + {"DIVUW", {{{0b0111011, 0, 7}, {0b101, 12, 3}, {0b0000001, 25, 7}}}}, + {"DIVW", {{{0b0111011, 0, 7}, {0b100, 12, 3}, {0b0000001, 25, 7}}}}, + {"MULW", {{{0b0111011, 0, 7}, {0b000, 12, 3}, {0b0000001, 25, 7}}}}, + {"REMUW", {{{0b0111011, 0, 7}, {0b111, 12, 3}, {0b0000001, 25, 7}}}}, + {"REMW", {{{0b0111011, 0, 7}, {0b110, 12, 3}, {0b0000001, 25, 7}}}}}; + +uint32_t GenerateInstEncoding(std::string Opcode, uint32_t Enc, + std::map &Map); +void PrintEncoding(std::ostream &OS, uint32_t InstEnc, bool Compressed); + +#endif // INSTRUCTION_TABLES_H Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/riscv_inst_encoding.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/riscv_inst_encoding.cpp @@ -0,0 +1,76 @@ +//==-- riscv_inst_encoding.cpp - Protobuf-Encoding conversion -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines function to generate 32-bit value from opcode and other fixed fields +// for fuzzing of RISC-V machine instructions, and function used to print this +// value. +// +//===----------------------------------------------------------------------===// +#include "riscv_inst_encoding.h" + +#include +#include + +using namespace mc_proto_fuzzer; + +uint32_t GenerateInstEncoding(std::string Opcode, uint32_t Enc, + std::map &Map) { + // 32-bit field built from all the fixed fields for this instruction, using + // the information encoded in the instruction table. + uint32_t FixedBits = 0; + // Temp value used to build FixedBits. + uint32_t FixedField = 0; + // This mask is used to clear bits of the Enc, so that they can be set using + // the fixed field value. + uint32_t Mask = 0; + + // Iterate through the fixed fields for this instruction. + for (size_t i = 0; i < Map[Opcode].Fields.size(); ++i) { + // Use the fuzzer-generated opcode to get the corresponding opcode + // in the instruction table. The Map contains the fixed fields for this + // opcode. + struct Field F = Map[Opcode].Fields[i]; + // Mask should have F.Size number of bits set to high (1). + Mask = (1 << F.Size) - 1; + // Make sure that the value isn't out of range (check that it does not + // exceed the value of the Mask). + // Then set the bits at the appropriate positions for this field, using + // the masked value. + FixedField = (F.Value & Mask) << F.Pos; + // Set the appropriate bits of the 32-bit encoding for the i'th fixed field. + FixedBits |= FixedField; + // Generate a bitmask with the bits of this fixed field set to 0, and all + // others set to 1. + Mask = ~(Mask << F.Pos); + // Use the mask to clear the appropriate bits of the fuzzer-generated 32-bit + // value, so that these bits can be set. + Enc &= Mask; + } + + // Now that the appropriate bits are cleared, use FixedBits to set the correct + // bits and then return the instruction encoding. + return Enc | FixedBits; +} + +void PrintEncoding(std::ostream &OS, uint32_t InstEnc, bool Compressed) { + // Print either 2 or 4 bytes, depending on whether this is a compressed 16-bit + // or a 32-bit instruction. + int NumBytes = (Compressed) ? 2 : 4; + // Print the machine instruction, byte by byte (0xXX). + for (int i = 0; i < NumBytes; ++i) { + // Extract the i'th byte of the instruction encoding, starting from the + // lowest byte. + uint32_t Byte = InstEnc & 0xFF; + // Print this byte as " 0xXX" in hexadecimal format. + OS << " 0x" << std::hex << std::setfill('0') << std::setw(2) << Byte; + // Right-shift the instruction encoding by 8 bits to remove the lowest byte + // so that the next lowest byte can be printed next. + InstEnc = InstEnc >> 8; + } +} Index: tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/unconstrained_proto_to_encoding.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-disassemble-proto-fuzzer/proto-to-encoding/unconstrained_proto_to_encoding.cpp @@ -0,0 +1,49 @@ +//==-- unconstrained_proto_to_encoding.cpp - Protobuf-Encoding conversion -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements functions for converting between protobufs and bit patterns +// that represent a subset of the RISC-V machine instruction encodings. This +// unconstrained version of the fuzzer generates a random 32-bit string. The +// functions defined in this file print out the 32-bit string in the hexadecimal +// format that can be passed to the LLVM MC Disassembler. +// +//===----------------------------------------------------------------------===// +#include "proto_to_encoding.h" +#include "unconstrained_encoding.pb.h" +#include "riscv_inst_encoding.h" + +#include +#include +#include +#include + +using namespace google::protobuf; + +namespace mc_proto_fuzzer { + +std::ostream &operator<<(std::ostream &OS, const Encoding &X) { + uint32_t InstEnc = X.encoding(); + PrintEncoding(OS, InstEnc, false /*Compressed*/); + return OS; +} + +std::string FunctionToString(const Encoding &Input) { + std::ostringstream OS; + OS << Input << "\n"; + return OS.str(); +} + +std::string ProtoToEncoding(const uint8_t *Data, size_t Size) { + Encoding Message; + if (!Message.ParsePartialFromArray(Data, Size)) + return "#error invalid proto\n"; + return FunctionToString(Message); +} + +} // namespace mc_proto_fuzzer