Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -304,6 +304,8 @@ option(BUILD_SHARED_LIBS "Build all libraries as shared libraries instead of static" OFF) +option(ENABLE_ASSEMBLE_PROTO_FUZZER "Build LLVM MC -assemble protobuf fuzzer." OFF) + option(LLVM_ENABLE_BACKTRACES "Enable embedding backtraces on crash." ON) if(LLVM_ENABLE_BACKTRACES) set(ENABLE_BACKTRACES 1) Index: cmake/modules/ProtobufMutator.cmake =================================================================== --- /dev/null +++ cmake/modules/ProtobufMutator.cmake @@ -0,0 +1,20 @@ +# TODO: Fix double build of protobuf_mutator +set(PBM_PREFIX protobuf_mutator) +set(PBM_PATH ${CMAKE_CURRENT_BINARY_DIR}/${PBM_PREFIX}/src/${PBM_PREFIX}) +set(PBM_LIB_PATH ${PBM_PATH}-build/src/libprotobuf-mutator.a) +set(PBM_FUZZ_LIB_PATH ${PBM_PATH}-build/src/libfuzzer/libprotobuf-mutator-libfuzzer.a) + +ExternalProject_Add(${PBM_PREFIX} + PREFIX ${PBM_PREFIX} + GIT_REPOSITORY https://github.com/google/libprotobuf-mutator.git + GIT_TAG master + CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + CMAKE_CACHE_ARGS -DCMAKE_C_COMPILER:FILEPATH=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER} + BUILD_BYPRODUCTS ${PBM_LIB_PATH} ${PBM_FUZZ_LIB_PATH} + UPDATE_COMMAND "" + INSTALL_COMMAND "" + ) + +set(ProtobufMutator_INCLUDE_DIRS ${PBM_PATH}) +set(ProtobufMutator_LIBRARIES ${PBM_FUZZ_LIB_PATH} ${PBM_LIB_PATH}) Index: tools/CMakeLists.txt =================================================================== --- tools/CMakeLists.txt +++ tools/CMakeLists.txt @@ -39,6 +39,7 @@ add_llvm_tool_subdirectory(llvm-config) add_llvm_tool_subdirectory(llvm-lto) add_llvm_tool_subdirectory(llvm-profdata) +add_llvm_tool_subdirectory(llvm-mc-assemble-proto-fuzzer) # Projects supported via LLVM_EXTERNAL_*_SOURCE_DIR need to be explicitly # specified. Index: tools/llvm-mc-assemble-proto-fuzzer/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/CMakeLists.txt @@ -0,0 +1,129 @@ +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} FuzzMutate) +set(CXX_FLAGS_NOFUZZ ${CMAKE_CXX_FLAGS}) +set(DUMMY_MAIN DummyMCFuzzer.cpp) +if(LLVM_LIB_FUZZING_ENGINE) + unset(DUMMY_MAIN) +elseif(LLVM_USE_SANITIZE_COVERAGE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer") + set(CXX_FLAGS_NOFUZZ "${CXX_FLAGS_NOFUZZ} -fsanitize=fuzzer-no-link") + unset(DUMMY_MAIN) +endif() + +# Needed by LLVM's CMake checks because this file defines multiple targets. +set(LLVM_OPTIONAL_SOURCES + DummyMCFuzzer.cpp + ExampleMCProtoFuzzer.cpp + RISCVMCProtoFuzzer.cpp + ) + +if(ENABLE_ASSEMBLE_PROTO_FUZZER) + # Create protobuf .h and .cc files, and put them in a library for use by + # llvm-mc-proto-fuzzer components. + find_package(Protobuf REQUIRED) + add_definitions(-DGOOGLE_PROTOBUF_NO_RTTI) + include_directories(${PROTOBUF_INCLUDE_DIRS}) + include_directories(${CMAKE_CURRENT_BINARY_DIR}) + protobuf_generate_cpp(EXAMPLE_PROTO_SRCS EXAMPLE_PROTO_HDRS + proto-files/example_asm_proto.proto) + protobuf_generate_cpp(RISCV_SRCS RISCV_HDRS + proto-files/riscv_operands.proto + proto-files/riscv.proto + proto-files/riscv_a.proto + proto-files/riscv_c.proto + proto-files/riscv_d.proto + proto-files/riscv_f.proto + proto-files/riscv_i.proto + proto-files/riscv_m.proto) + protobuf_generate_cpp(RISCV_OPND_VALS_SRCS RISCV_OPND_VALS_HDRS + proto-files/riscv_fuzz_opnd_values.proto + proto-files/riscv_operands.proto + proto-files/riscv_a.proto + proto-files/riscv_c.proto + proto-files/riscv_d.proto + proto-files/riscv_f.proto + proto-files/riscv_i.proto + proto-files/riscv_m.proto) + + set(LLVM_OPTIONAL_SOURCES ${LLVM_OPTIONAL_SOURCES} ${PROTO_SRCS}) + + llvm_add_library(mcASMProto + ${EXAMPLE_PROTO_SRCS} + ${EXAMPLE_PROTO_HDRS} + + LINK_LIBS + ${PROTOBUF_LIBRARIES} + ) + + llvm_add_library(mcRISCVASMProto + ${RISCV_SRCS} + ${RISCV_HDRS} + + LINK_LIBS + ${PROTOBUF_LIBRARIES} + ) + + llvm_add_library(mcRISCVFuzzOpndValuesASMProto + ${RISCV_OPND_VALS_SRCS} + ${RISCV_OPND_VALS_HDRS} + + LINK_LIBS + ${PROTOBUF_LIBRARIES} + ) + + # Build and include libprotobuf-mutator + include(ProtobufMutator) + include_directories(${ProtobufMutator_INCLUDE_DIRS}) + + # Build the .proto files. + add_llvm_subdirectory(LLVM TOOL proto-files) + + # Build the protobuf->C++ translation library and driver. + add_subdirectory(proto-to-asm) + + # Build the fuzzer initialization library. + add_llvm_executable(llvm-mc-assemble-proto-fuzzer-example + ${DUMMY_MAIN} + ExampleMCProtoFuzzer.cpp + ) + + add_llvm_executable(llvm-mc-assemble-proto-fuzzer-riscv-fuzz-opnd-values + ${DUMMY_MAIN} + RISCVMCProtoFuzzer.cpp + ) + + add_llvm_executable(llvm-mc-assemble-proto-fuzzer-riscv + ${DUMMY_MAIN} + RISCVMCProtoFuzzer.cpp + ) + + set(COMMON_PROTO_FUZZ_LIBRARIES + ${ProtobufMutator_LIBRARIES} + ${PROTOBUF_LIBRARIES} + ${LLVM_LIB_FUZZING_ENGINE} + mcHandleASM + ) + + target_link_libraries(llvm-mc-assemble-proto-fuzzer-example + PRIVATE + ${COMMON_PROTO_FUZZ_LIBRARIES} + mcASMProto + mcProtoToASM + ) + + target_link_libraries(llvm-mc-assemble-proto-fuzzer-riscv + PRIVATE + ${COMMON_PROTO_FUZZ_LIBRARIES} + mcRISCVASMProto + mcRISCVProtoToASM + ) + + target_link_libraries(llvm-mc-assemble-proto-fuzzer-riscv-fuzz-opnd-values + PRIVATE + ${COMMON_PROTO_FUZZ_LIBRARIES} + mcRISCVFuzzOpndValuesASMProto + mcRISCVFuzzOpndValuesProtoToASM + ) + +endif() + +add_subdirectory(handle-asm) Index: tools/llvm-mc-assemble-proto-fuzzer/DummyMCFuzzer.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/DummyMCFuzzer.cpp @@ -0,0 +1,21 @@ +//===-- DummyMCFuzzer.cpp - Entry point to sanity check fuzzers -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Provides a main() to build without linking libFuzzer. +// +//===----------------------------------------------------------------------===// +#include "llvm/FuzzMutate/FuzzerCLI.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv); + +int main(int argc, char *argv[]) { + return llvm::runFuzzerOnInputs(argc, argv, LLVMFuzzerTestOneInput, + LLVMFuzzerInitialize); +} Index: tools/llvm-mc-assemble-proto-fuzzer/ExampleMCProtoFuzzer.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/ExampleMCProtoFuzzer.cpp @@ -0,0 +1,27 @@ +//===-- ExampleMCProtoFuzzer.cpp - Fuzz Assembler --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a function that runs llvm mc assemble on a single +/// input and uses libprotobuf-mutator to find new inputs. This function is +/// then linked into the Fuzzer library. +/// +//===----------------------------------------------------------------------===// + +#include "example_asm_proto.pb.h" +#include "handle-asm/handle_asm.h" +#include "proto-to-asm/proto_to_asm.h" +#include "src/libfuzzer/libfuzzer_macro.h" + +using namespace mc_proto_fuzzer; + +DEFINE_BINARY_PROTO_FUZZER(const Assembly& input) { + auto S = FunctionToString(input); + HandleASM(S); +} Index: tools/llvm-mc-assemble-proto-fuzzer/README.txt =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/README.txt @@ -0,0 +1,120 @@ +------------------------------------------------------------------------------- +Building: +------------------------------------------------------------------------------- +From your LLVM source directory: +$ mkdir -p build/llvm && cd build/llvm +$ cmake -GNinja \ + -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="RISCV" \ + -DCMAKE_VERBOSE_MAKEFILE=ON \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DLLVM_USE_SANITIZE_COVERAGE=YES \ + -DLLVM_USE_SANITIZER=Address \ + -DENABLE_ASSEMBLE_PROTO_FUZZER=ON \ + ../../llvm +$ ninja -v llvm-mc-assemble-proto-fuzzer-example \ + llvm-mc-assemble-proto-to-asm-example \ + llvm-mc-assemble-proto-fuzzer-riscv \ + llvm-mc-assemble-proto-to-asm-riscv \ + llvm-mc-assemble-proto-fuzzer-riscv-fuzz-opnd-values \ + llvm-mc-assemble-proto-to-asm-riscv-fuzz-opnd-values + +------------------------------------------------------------------------------- +Setting Up: +------------------------------------------------------------------------------- +Create a corpus directory and a directory for output files (for example, +./corpus and ./outputdir). + +Make sure that llvm-mc-assemble-proto-fuzzer-riscv and +llvm-mc-assemble-proto-to-asm-riscv +are in your path. + +------------------------------------------------------------------------------- +Running: +------------------------------------------------------------------------------- +A. If you would like to run the fuzzer for a long period of time. +------------------------------------------------------------------------------- +Run the fuzzer (llvm-mc-assemble-proto-fuzzer-riscv), specifying a corpus +directory and any other relevant arguments. + +For example, from your build/llvm directory: +$ ./bin/llvm-mc-assemble-proto-fuzzer-riscv corpus \ + -triple=riscv32 \ + -mattr=+a,+c,+m,+f,+d \ + -fuzzer-args \ + -max_len=32 + +When the fuzzer finishes running (if you restrict max_len to 32, the corpus +should converge relatively quickly, likely within an hour), then run the Python +script. +------------------------------------------------------------------------------- +B. If you would only like to run the fuzzer for a specific number of runs. +------------------------------------------------------------------------------- +Simply run it via the Python script and pass --runs [num_runs] on the +command line. + +Specify any args you would like to run the fuzzer with, including the corpus +directory and the full path to the GNU objdump and GNU assembler executables. + +For example: + +$ python mcfuzz.py --corpus corpus \ + --max-len 32 \ + --runs 1 \ + --triple riscv32 \ + --out outputdir \ + --objdump /full/path/to/riscv32-unknown-linux-gnu-objdump \ + --mattr +a,+c,+m,+d,+f \ + --march rv32imafdc \ + --assemble /full/path/to/riscv32-unknown-linux-gnu-as \ + --fuzz-opnd-values + +By default, the script will run the fuzzer that fuzzes not only operand values +(Register class, Immediate range) but also operand types and number of operands. +To restrict the fuzzing of operands to maintain the structure of instructions +(i.e. an ADD instruction will have three Register operands rather than a +random mutated combination of any operand types), use the flag: +--fuzz-opnd-values. + +Run the script with the --verbose flag if you would like to see the output +of the fuzzer as it runs, along with other relevant information. Otherwise, the +script will simply print out a summary of results at the end. + +NOTE: If you run the script without specifying an output directory [--out], +the script will terminate after fuzzing the corpus a specified +number [--runs] of times. + +After the first part of the script completes, the corpus directory should +contain the generated corpus files. + +Next, we populate the output directory by iterating through all the files +currently in the corpus. First, we run the golden assembler (GNU) and check its +behavior (successfully assembled, or failed to assemble) against the behavior of +our fuzz target (LLVM-MC assembler). If and only if both assemblers assemble the +input, we proceed. + +We use the llvm-mc-assemble-proto-to-asm-riscv tool to generate the .s files. +We use the llvm-mc-assemble-proto-fuzzer-riscv tool, invoked on each file +in the corpus, with -filetype=obj, and -runs=1, to generate a +corresponding .o file. Finally, we call objdump on the .o file to produce a +.objdump file, which can be compared (after some processing) to the .s file. + +Thus, the output directory should contain .o, .s, and .objdump files when the +script concludes. + +The last part of the script prints out a summary of results: on which files the +two assemblers (fuzz target LLVM assembler and golden assembler GNU assembler) +both failed, on which files only one assembler failed, and on which files both +assemblers successfully assembled the input assembly statements. + +The printed output will show the assembly statements that caused the fuzz target +LLVM assembler to fail (but not the golden assembler). It will also print out the +assembly statements that both assemblers assembled if there were any differences +between the golden disassembler's output and our original input assembly +statement. + +------------------------------------------------------------------------------- +Notes: +------------------------------------------------------------------------------- +Some instructions are given aliases by the golden disassembler, and therefore +the diff sometimes fails even when both assemblers can assemble the input ASM. Index: tools/llvm-mc-assemble-proto-fuzzer/RISCVMCProtoFuzzer.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/RISCVMCProtoFuzzer.cpp @@ -0,0 +1,27 @@ +//===-- RISCVMCProtoFuzzer.cpp - Fuzz Assembler ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a function that runs llvm mc assemble on a single +/// input and uses libprotobuf-mutator to find new inputs. This function is +/// then linked into the Fuzzer library. +/// +//===----------------------------------------------------------------------===// + +#include "handle-asm/handle_asm.h" +#include "proto-to-asm/proto_to_asm.h" +#include "riscv.pb.h" +#include "src/libfuzzer/libfuzzer_macro.h" + +using namespace mc_proto_fuzzer; + +DEFINE_BINARY_PROTO_FUZZER(const Assembly& input) { + auto S = FunctionToString(input); + HandleASM(S); +} Index: tools/llvm-mc-assemble-proto-fuzzer/example_mcfuzz.py =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/example_mcfuzz.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python + +import argparse +import difflib +import os +import shlex +import subprocess +import sys +import tempfile + +parser = argparse.ArgumentParser() + +# Flags for the directory names, corpus and outputdir. +parser.add_argument("--corpus", type=str, help="corpus directory name", + required=True) +parser.add_argument("--out", type=str, + help="output directory name for obj files", default=None) + +# Flags for llvm-mc-assemble-proto-fuzzer: triple, mattr, filetype. +parser.add_argument("--triple", type=str, help="specify the triple", + default="riscv32") +parser.add_argument("--mattr", type=str, help="specify mattr", + default="") +parser.add_argument("--filetype", type=str, help="asm or obj", + default="obj") + +# These args are passed in after the -fuzzer-args flag. +parser.add_argument("--runs", type=int, help="number of runs", default=100) +parser.add_argument("--max-len", type=int, + help="limit instruction size for fuzzing", default=40) + +# Flag specifies the name of objdump executable. +parser.add_argument("--objdump", type=str, help="specify the path to" + " objdump") + +# Flag specifies that we should print out everything +parser.add_argument("--verbose", dest="verbose", action="store_true") +parser.set_defaults(verbose=False) + +args = parser.parse_args() + +# Step 1: Invoke fuzzer to generate a corpus. +cmd = 'llvm-mc-assemble-proto-fuzzer-example {corpus} -triple={triple} ' \ + + '-mattr={mattr} -filetype={filetype} -fuzzer-args ' \ + + '-runs={runs} -max_len={max_len}' +cmd = cmd.format(corpus=args.corpus, triple=args.triple, mattr=args.mattr, + filetype=args.filetype, runs=args.runs, max_len=args.max_len) +fuzz_command = shlex.split(cmd) + +fuzz_proc = subprocess.Popen(fuzz_command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) +fuzz_out, fuzz_err = fuzz_proc.communicate() +if fuzz_proc.returncode != 0: + raise ValueError('failed to run fuzz {}: {}'.format(fuzz_command, + fuzz_err)) +if args.verbose: + print(fuzz_out) + +# If the user specified an output directory, proceed to step 2; else, exit. +if args.out is None: + print("No output directory specified; exiting after populating " + "corpus directory.") + sys.exit(0) + +# Keep track of which diffs pass / fail. +passes = 0 +fails = 0 +list_of_failures = [] +# Iterate through the corpus body. +# For each file, generate a .s and a .o file. Then, use objdump to +# generate a .objdump file, which we will compare to the .s file. +# Diff the .s with the corresponding .objdump file. +for filename in os.listdir(args.corpus): + + filename_prefix = args.out + "/" + filename + + # Step 2: Run proto-to-asm on corpus file to generate .s file. + proto_to_asm_command = ["llvm-mc-assemble-proto-to-asm", + args.corpus + "/" + filename] + asm_file = open(filename_prefix + ".s", "w+r") + asm_proc = subprocess.Popen(proto_to_asm_command, stdout=asm_file, + stderr=subprocess.STDOUT) + asm_out, asm_err = asm_proc.communicate() + if asm_proc.returncode != 0: + raise ValueError('failed to run {}: {}'.format(proto_to_asm_command, + asm_err)) + asm_file.close() + + # Step 3: Generate .o files in the outputdir. + cmd = 'llvm-mc-assemble-proto-fuzzer {corpus}/{file} -triple={triple}' \ + + ' -mattr={mattr} -out={out} -filetype=obj -fuzzer-args -runs=1' + cmd = cmd.format(corpus=args.corpus, file=filename, triple=args.triple, + mattr=args.mattr, out=args.out) + obj_files_command = shlex.split(cmd) + obj_proc = subprocess.Popen(obj_files_command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + obj_out, obj_err = obj_proc.communicate() + if obj_proc.returncode != 0: + raise ValueError('failed to run fuzzer {}: {}'.format( + obj_files_command, obj_err)) + if args.verbose: + print(obj_out) + + # Step 4: Call objdump on each .o file in the output directory, + # to generate corresponding .objdump files. + objdump_file = open(filename_prefix + ".objdump", "w+r") + cmd = '{objdump} -dr -M numeric {filename}.o' + cmd = cmd.format(objdump=args.objdump, filename=filename_prefix) + objdump_command = shlex.split(cmd) + objdump_proc = subprocess.Popen(objdump_command, + stdout=objdump_file, + stderr=subprocess.STDOUT) + objdump_out, objdump_err = objdump_proc.communicate() + if objdump_proc.returncode != 0: + raise ValueError('failed to run objdump {}: ' + + '{}'.format(objdump_command, objdump_err)) + + # Step 5: Process files generated by objdump so that the files only contain + # instructions. + objdump_file.seek(0) + temp_objdump_file = tempfile.TemporaryFile("w+r") + # Only the lines containing asm instructions have tabs in them; remove all + # other lines (for example, header lines and whitespace). + for line in objdump_file: + if "\t" in line: + temp_objdump_file.write(line) + objdump_file.close() + with open(filename_prefix + ".parsed_objdump", "w") as write_objdump_file: + temp_objdump_file.seek(0) + # Each line of the objdump output looks something like this: + # 0: 00318033 add x0,x3,x3 + # We remove the first two columns, leaving only the asm instruction. + for line in temp_objdump_file: + parts = line.split()[2:] + for part in parts: + write_objdump_file.write("\t") + write_objdump_file.write(part) + write_objdump_file.write("\n") + temp_objdump_file.close() + + # Step 6: Diff the .s file generated by proto-to-asm and the .objdump file + # generated by objdump. + prefix = "Checking " + filename + "..." + file1 = open(filename_prefix + ".parsed_s", "r") + file2 = open(filename_prefix + ".parsed_objdump", "r") + diff = difflib.ndiff(file1.readlines(), file2.readlines()) + delta = ''.join(x[0:] for x in diff if x.startswith(('- ', '+ '))) + if delta: + print(prefix + "FAILURE!") + fails = fails + 1 + list_of_failures.append(filename) + list_of_failures.append(delta) + else: + print(prefix + "SUCCESS!") + passes = passes + 1 + +print("Succeeded: " + str(passes) + "\t\tFailed: " + str(fails) + + "\t\t[Total: " + str(passes + fails) + "]") + +if (fails != 0): + print("The following files failed...") + for item in list_of_failures: + print(item) + +sys.exit(0 if fails == 0 else 1) Index: tools/llvm-mc-assemble-proto-fuzzer/handle-asm/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/handle-asm/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} Support) + +add_llvm_library(mcHandleASM + handle_asm.cpp + ) Index: tools/llvm-mc-assemble-proto-fuzzer/handle-asm/handle_asm.h =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/handle-asm/handle_asm.h @@ -0,0 +1,24 @@ +//==-- handle_asm.h - Helper function for mc fuzzers --------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines HandleASM for use by the MC fuzzers. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_TOOLS_MC_FUZZER_HANDLE_ASM_HANDLEASM_H +#define LLVM_MC_TOOLS_MC_FUZZER_HANDLE_ASM_HANDLEASM_H + +#include +#include + +namespace mc_proto_fuzzer { +void HandleASM(const std::string &S); +} // namespace mc_proto_fuzzer + +#endif Index: tools/llvm-mc-assemble-proto-fuzzer/handle-asm/handle_asm.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/handle-asm/handle_asm.cpp @@ -0,0 +1,320 @@ +//==-- handle_asm.cpp - Helper function for mc fuzzers ------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements HandleASM for use by the mc fuzzers. +// +//===----------------------------------------------------------------------===// + +#include "handle_asm.h" + +#include "llvm-c/Target.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.inc" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ToolOutputFile.h" + +#include + +using namespace llvm; +using namespace mc_proto_fuzzer; + +static cl::opt + OutputDirname("out", + cl::desc("Directory name for llvm-mc-assemble-proto-fuzzer " + "output when filetype=obj"), + cl::value_desc("dirname"), + cl::init("./outputdir")); + +static cl::opt + TripleName("triple", cl::desc("Target triple to assemble for, " + "see -version for available targets")); + +static cl::list + MAttrs("mattr", cl::CommaSeparated, + cl::desc("Target specific attributes (-mattr=help for details)"), + cl::value_desc("a1,+a2,-a3,...")); +// The feature string derived from -mattr's values. +std::string FeaturesStr; + +static cl::list + FuzzerArgs("fuzzer-args", cl::Positional, + cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore, + cl::PositionalEatsArgs); + +static std::vector ModifiedArgv; + +enum OutputFileType { + OFT_Null, + OFT_AssemblyFile, + OFT_ObjectFile +}; +static cl::opt + FileType("filetype", + cl::init(OFT_AssemblyFile), + cl::desc("Choose an output file type:"), + cl::values(clEnumValN(OFT_AssemblyFile, "asm", + "Emit an assembly ('.s') file"), + clEnumValN(OFT_Null, "null", + "Don't emit anything (for timing purposes)"), + clEnumValN(OFT_ObjectFile, "obj", + "Emit a native object ('.o') file"))); + +static std::unique_ptr GetOutputStream(StringRef Path) { + std::error_code EC; + auto Out = llvm::make_unique(Path, EC, sys::fs::F_None); + if (EC) { + errs() << EC.message() << '\n'; + return nullptr; + } + return Out; +} + +static std::string OutputFilename = ""; + +static int assembleInput(const char *ProgName, const Target *TheTarget, + SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str, + MCAsmInfo &MAI, MCSubtargetInfo &STI, + MCInstrInfo &MCII, MCTargetOptions &MCOptions) { + static const bool NoInitialTextSection = false; + + std::unique_ptr Parser( + createMCAsmParser(SrcMgr, Ctx, Str, MAI)); + + std::unique_ptr TAP( + TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions)); + + if (!TAP) { + errs() << ProgName + << ": error: this target '" << TripleName + << "', does not support assembly parsing.\n"; + abort(); + } + + Parser->setShowParsedOperands(true); + Parser->setTargetParser(*TAP); + + return Parser->Run(NoInitialTextSection); +} + +void mc_proto_fuzzer::HandleASM(const std::string &S) { + + const bool ShowInst = true; + const bool AsmVerbose = true; + const bool UseDwarfDirectory = true; + + Triple TheTriple(Triple::normalize(TripleName)); + + SourceMgr SrcMgr; + + std::unique_ptr BufferPtr = + llvm::MemoryBuffer::getMemBuffer(S); + + // Tell SrcMgr about this buffer, which is what the parser will pick up. + SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc()); + + static const std::vector NoIncludeDirs; + SrcMgr.setIncludeDirs(NoIncludeDirs); + + static std::string ArchName; + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple, + Error); + if (!TheTarget) { + errs() << "error: this target '" << TheTriple.normalize() + << "/" << ArchName << "', was not found: '" << Error << "'\n"; + + abort(); + } + + std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); + if (!MRI) { + errs() << "Unable to create target register info!"; + abort(); + } + + std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, TripleName)); + if (!MAI) { + errs() << "Unable to create target asm info!"; + abort(); + } + + MCObjectFileInfo MOFI; + MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); + + static const bool UsePIC = false; + MOFI.InitMCObjectFileInfo(TheTriple, UsePIC, Ctx); + + const unsigned OutputAsmVariant = 0; + std::unique_ptr MCII(TheTarget->createMCInstrInfo()); + + MCInstPrinter *IP = nullptr; + const char *ProgName = "llvm-mc-assemble-proto-fuzzer"; + std::unique_ptr STI( + TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); + std::unique_ptr MAB = nullptr; + + std::unique_ptr CE; + CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); + + MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); + + std::string OutputString; + raw_string_ostream Out(OutputString); + auto FOut = llvm::make_unique(Out); + + std::unique_ptr Str; + + if (FileType == OFT_AssemblyFile) { + IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant, + *MAI, *MCII, *MRI); + if (!IP) { + errs() + << "error: unable to create instruction printer for target triple '" + << TheTriple.normalize() << "' with assembly variant " + << OutputAsmVariant << ".\n"; + + abort(); + } + + std::unique_ptr MAB( + TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); + Str.reset(TheTarget->createAsmStreamer(Ctx, std::move(FOut), AsmVerbose, + UseDwarfDirectory, IP, std::move(CE), + std::move(MAB), ShowInst)); + Str->setUseAssemblerInfoForParsing(true); + const int Res = assembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, + *MAI, *STI, *MCII, MCOptions); + + (void) Res; + } else { + assert(FileType == OFT_ObjectFile && "Invalid file type!"); + MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions); + + std::error_code EC; + if (OutputFilename.empty()) { + errs() << "output dir name is empty\n"; + OutputFilename = "-"; + } + errs() << "output file name is " << OutputFilename << "\n"; + std::unique_ptr Out = GetOutputStream(OutputFilename); + assert(Out && "Output Stream is null!"); + + // Don't waste memory on names of temp labels. + Ctx.setUseNamesOnTempLabels(false); + + std::unique_ptr BOS; + raw_pwrite_stream *OS = &Out->os(); + if (!Out->os().supportsSeeking()) { + BOS = make_unique(Out->os()); + OS = BOS.get(); + } + + Str.reset(TheTarget->createMCObjectStreamer( + TheTriple, Ctx, std::unique_ptr(MAB), + MAB->createObjectWriter(*OS), std::move(CE), *STI, + MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ false)); + Str->setUseAssemblerInfoForParsing(true); + const int Res = assembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, + *MAI, *STI, *MCII, MCOptions); + + Out->keep(); + + (void) Res; + } + + return; +} + +extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, + char ***argv) { + // The command line is unusual compared to other fuzzers due to the need to + // specify the target. Options like -triple, -mcpu, and -mattr work like + // their counterparts in llvm-mc, while -fuzzer-args collects options for the + // fuzzer itself. + // + // Examples: + // + // Fuzz the RISCV32 assembler using 100,000 inputs of up to 32-bytes each + // and use the contents of ./corpus as the test corpus: + // llvm-mc-assemble-proto-fuzzer -triple riscv32 \ + // -fuzzer-args -max_len=32 -runs=100000 ./corpus + // + // If your aim is to find instructions that are not tested, then it is + // advisable to constrain the maximum input size to a single instruction + // using -max_len as in the first example. This results in a test corpus of + // individual instructions that test unique paths. Without this constraint, + // there will be considerable redundancy in the corpus. + + char **OriginalArgv = *argv; + + LLVMInitializeAllTargetInfos(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllAsmParsers(); + + cl::ParseCommandLineOptions(*argc, OriginalArgv); + + // Rebuild the argv without the arguments llvm-mc-assemble-proto-fuzzer + // consumed so that the driver can parse its arguments. + // + // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs. + // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a + // non-const buffer to avoid the need to clean up when the fuzzer terminates. + + ModifiedArgv.push_back(OriginalArgv[0]); + for (const auto &FuzzerArg : FuzzerArgs) { + for (int i = 1; i < *argc; ++i) { + if (FuzzerArg == OriginalArgv[i]) + ModifiedArgv.push_back(OriginalArgv[i]); + } + } + + *argc = ModifiedArgv.size(); + *argv = ModifiedArgv.data(); + // Check for specified corpus directory or file. + // If user specifies a corpus file, set OutputFilename so that + // the corresponding .o file generated by the fuzz target can + // be saved. + for (int i = 1; i < *argc; ++i) { + struct stat S; + if (stat(ModifiedArgv[i], &S) == 0) { + if (S.st_mode & S_IFREG) { + std::string NewFilename(ModifiedArgv[i]); + std::size_t Pos = NewFilename.find_last_of("/"); + if (Pos != std::string::npos) + NewFilename = NewFilename.erase(0, Pos + 1); + OutputFilename = OutputDirname + "/" + NewFilename + ".o"; + break; + } + } + } + + // Package up features to be passed to target/subtarget. + // We have to pass it via a global since the callback doesn't + // permit any user data. + if (MAttrs.size()) { + SubtargetFeatures Features; + for (unsigned i = 0; i != MAttrs.size(); ++i) + Features.AddFeature(MAttrs[i]); + FeaturesStr = Features.getString(); + } + + if (TripleName.empty()) + TripleName = sys::getDefaultTargetTriple(); + + return 0; +} Index: tools/llvm-mc-assemble-proto-fuzzer/mcfuzz.py =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/mcfuzz.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python + +import argparse +import difflib +import os +import shlex +import subprocess +import sys +import tempfile + + +def main(): + parser = argparse.ArgumentParser() + parse_arguments(parser) + args = parser.parse_args() + + # Step 1: Invoke fuzzer to generate a corpus. + call_fuzzer(args) + + # If the user did not specify output dir, skip steps 2-8; exit. + if args.out is None: + print("No output directory specified; exiting after populating " + "corpus directory.") + sys.exit(0) + + # List of corpus files that LLVM Assembler and GNU Assembler + # both fail to assemble. + list_of_llvm_as_and_gnu_as_fails = [] + # List of corpus files that only LLVM Assembler fails to assemble. + list_of_llvm_as_fails = [] + # List of corpus files that only GNU Assembler fails to assemble. + list_of_gnu_as_fails = [] + # List of corpus files that both assemblers were able to assemble. + list_of_llvm_as_and_gnu_as_passes = [] + # Keep track of which diffs pass / fail. + passes = 0 + fails = 0 + list_of_diffs = [] + + # Iterate through the corpus body. + + # For each file, re-run the fuzzer to generate an object file with the fuzz + # target. Also run the golden assembler (gnu as) to generate a reference + # object file. If both assemblers behave the same way, proceed to generate + # a .s file (using the proto-to-asm tool), disassemble the object file + # generated by the fuzz target (using objdump) and then compare the + # resulting .objdump with your .s file. + for filename in os.listdir(args.corpus): + filename_prefix = args.out + "/" + filename + # Step 2: Run fuzzer with filetype=obj; check for error in fuzz target. + target_error_occurred, out = call_fuzzer_on_corpus_file(args, filename) + + # Step 3: Run proto-to-asm on corpus file to generate .s file. + call_proto_to_asm(args, filename) + + # Step 4: Run golden assembler (gnu) and check for error. + golden_error_occurred, golden_out = call_golden_assembler(args, + filename) + + # Step 5: Compare behavior of fuzz target and golden assembler. + #################################################################### + #-- Initial, tentative interpretation of results, based on the --# + #-- status (pass/fail) of tools (LLVM Assembler, GNU Assembler, --# + #-- and GNU Objdump) --# + #------------------------------------------------------------------# + # LLVM AS | GNU AS | GNU OBJDUMP | Conclusion # + #------------------------------------------------------------------# + # 0 | 0 | x | invalid/unimplemented instr # + # 0 | 1 | x | LLVM MC bug/unimplemented instr # + # 1 | 0 | x | LLVM MC bug # + # 1 | 1 | 0 | LLVM MC bug and GCC bug # + # 1 | 1 | 1 | success # + #################################################################### + if target_error_occurred and golden_error_occurred: + if args.verbose: + print("Both assemblers failed to assemble file: " + filename) + list_of_llvm_as_and_gnu_as_fails.append(filename) + continue + if target_error_occurred: + if args.verbose: + print("Only target Assembler failed to assemble file: " + + filename) + list_of_llvm_as_fails.append(filename) + out = out.split("\n") + for line in out: + if (line.startswith("error:")): + list_of_llvm_as_fails.append(line) + nextline = out[out.index(line) + 1] + list_of_llvm_as_fails.append(nextline) + continue + if golden_error_occurred: + if args.verbose: + print("Only golden Assembler failed to assemble file: " + + filename) + list_of_gnu_as_fails.append(filename) + error_msg = golden_out.split("Error:", golden_out.count("\n")) + list_of_gnu_as_fails.append(error_msg[1]) + continue + if args.verbose: + print("Both assemblers assembled file: " + filename) + list_of_llvm_as_and_gnu_as_passes.append(filename) + + # Step 6: Call objdump on each .o file (generated by fuzz target) in + # the output directory, to generate corresponding .objdump files. + objdump_file = call_objdump(args, filename) + + # Step 7: Process files generated by objdump so that the files only + # contain instructions. + process_objdump_file(objdump_file, args, filename) + + # Step 8: Diff the .s file generated by proto-to-asm with .objdump file + # generated by objdump. + passes, fails, list_of_diffs = print_file_status(args, filename, + passes, fails, + list_of_diffs) + + print_result(passes, fails, list_of_llvm_as_and_gnu_as_fails, + list_of_llvm_as_fails, list_of_gnu_as_fails, + list_of_llvm_as_and_gnu_as_passes, list_of_diffs) + + sys.exit(0 if fails == 0 else 1) + + +def parse_arguments(parser): + # Flags for the directory names, corpus and outputdir. + parser.add_argument("--corpus", type=str, help="corpus directory name", + required=True) + parser.add_argument("--out", type=str, + help="output directory name for obj files", + default=None) + + # Flags for llvm-mc-assemble-proto-fuzzer: triple, mattr, filetype. + parser.add_argument("--triple", type=str, help="specify the triple", + default="riscv32") + parser.add_argument("--mattr", type=str, help="specify mattr", + default="") + parser.add_argument("--march", type=str, help="specify march", + default="rv32i") + parser.add_argument("--filetype", type=str, help="asm or obj", + default="obj") + + # These args are passed in after the -fuzzer-args flag. + parser.add_argument("--runs", type=int, help="number of runs", default=100) + parser.add_argument("--max-len", type=int, + help="limit instruction size for fuzzing", default=40) + + # Flag specifies the name of objdump executable. + parser.add_argument("--objdump", type=str, help="specify the path to" + " objdump") + + # Flag specifies the name of golden assembler executable. + parser.add_argument("--assemble", type=str, help="specify the path to" + " golden assembler") + + # Flag specifies that we should print out everything + parser.add_argument("--verbose", dest="verbose", action="store_true") + parser.set_defaults(verbose=False) + + # Flag indicates that the script should run the fuzzer that fuzzes operand + # values only + parser.add_argument("--fuzz-opnd-values", dest="fuzz_opnd_values", + action="store_true") + parser.set_defaults(fuzz_opnd_values=False) + + +# This function calls the llvm-mc-assemble-proto-fuzzer with a given extension +def call_fuzzer(args): + if args.fuzz_opnd_values: + cmd = 'llvm-mc-assemble-proto-fuzzer-riscv-fuzz-opnd-values {corpus} \ + -triple={triple} -mattr={mattr} -filetype={filetype} ' \ + '-fuzzer-args -runs={runs} -max_len={max_len}' + else: + cmd = 'llvm-mc-assemble-proto-fuzzer-riscv {corpus} \ + -triple={triple} -mattr={mattr} -filetype={filetype} ' \ + '-fuzzer-args -runs={runs} -max_len={max_len}' + cmd = cmd.format(corpus=args.corpus, triple=args.triple, + mattr=args.mattr, filetype=args.filetype, + runs=args.runs, max_len=args.max_len) + fuzz_command = shlex.split(cmd) + + fuzz_proc = subprocess.Popen(fuzz_command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + fuzz_out, fuzz_err = fuzz_proc.communicate() + if fuzz_proc.returncode != 0: + raise ValueError('failed to run fuzz {}: {}'.format(fuzz_command, + fuzz_err)) + if args.verbose: + print(fuzz_out) + + +def call_fuzzer_on_corpus_file(args, filename): + target_error_occurred = False + if args.fuzz_opnd_values: + cmd = 'llvm-mc-assemble-proto-fuzzer-riscv-fuzz-opnd-values ' \ + '{corpus}/{file} -triple={triple} -mattr={mattr} ' \ + '-out={out} -filetype=obj -fuzzer-args -runs=1' + else: + cmd = 'llvm-mc-assemble-proto-fuzzer-riscv {corpus}/{file} \ + -triple={triple} -mattr={mattr} -filetype=obj ' \ + '-out={out} -fuzzer-args -runs=1' + cmd = cmd.format(corpus=args.corpus, file=filename, + triple=args.triple, mattr=args.mattr, out=args.out) + target_as_command = shlex.split(cmd) + target_as_proc = subprocess.Popen(target_as_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + target_as_out, target_as_err = target_as_proc.communicate() + if target_as_proc.returncode != 0: + raise ValueError('failed to run fuzzer {}: {}'.format( + target_as_command, target_as_err)) + if target_as_out: + if "error" in target_as_out: + target_error_occurred = True + if args.verbose: + print(target_as_out) + return target_error_occurred, target_as_out + + +def call_proto_to_asm(args, filename): + filename_prefix = args.out + "/" + filename + if args.fuzz_opnd_values: + cmd = 'llvm-mc-assemble-proto-to-asm-riscv-fuzz-opnd-values ' \ + + '{corpus}/{file}' + else: + cmd = 'llvm-mc-assemble-proto-to-asm-riscv {corpus}/{file}' + cmd = cmd.format(corpus=args.corpus, file=filename) + proto_to_asm_command = shlex.split(cmd) + asm_file = open(filename_prefix + ".s", "w+r") + asm_proc = subprocess.Popen(proto_to_asm_command, stdout=asm_file, + stderr=subprocess.STDOUT) + asm_out, asm_err = asm_proc.communicate() + if asm_proc.returncode != 0: + raise ValueError('failed to run {}: {}'.format(proto_to_asm_command, + asm_err)) + elif asm_out: + if args.verbose: + print("Asm_out: " + asm_out) + asm_file.close() + + +def call_golden_assembler(args, filename): + golden_error_occurred = False + cmd = '{assemble} {dirname}/{filename}.s -march={march} ' \ + + '-o {dirname}/{filename}.out' + cmd = cmd.format(assemble=args.assemble, march=args.march, + dirname=args.out, filename=filename) + golden_as_command = shlex.split(cmd) + golden_as_proc = subprocess.Popen(golden_as_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + golden_as_out, golden_as_err = golden_as_proc.communicate() + if golden_as_proc.returncode != 0: + if "Error:" in golden_as_out: + if args.verbose: + print("golden_as_out: " + golden_as_out) + golden_error_occurred = True + elif args.verbose: + print(golden_as_out) + return golden_error_occurred, golden_as_out + + +def call_objdump(args, filename): + filename_prefix = args.out + "/" + filename + objdump_file = open(filename_prefix + ".objdump", "w+r") + cmd = '{objdump} -dr -M numeric {filename}.o' + cmd = cmd.format(objdump=args.objdump, filename=filename_prefix) + objdump_command = shlex.split(cmd) + objdump_proc = subprocess.Popen(objdump_command, + stdout=objdump_file, + stderr=subprocess.STDOUT) + objdump_out, objdump_err = objdump_proc.communicate() + if objdump_proc.returncode != 0: + raise ValueError('failed to run objdump {}: ' + + '{}'.format(objdump_command, objdump_err)) + return objdump_file + + +def process_objdump_file(objdump_file, args, filename): + filename_prefix = args.out + "/" + filename + objdump_file.seek(0) + temp_objdump_file = tempfile.TemporaryFile("w+r") + # Only the lines containing asm instructions have tabs in them; remove + # all other lines (for example, header lines and whitespace). + for line in objdump_file: + if "\t" in line: + temp_objdump_file.write(line) + objdump_file.close() + with open(filename_prefix + ".parsed_objdump", "w") as write_objdump_file: + temp_objdump_file.seek(0) + # Each line of the objdump output looks something like this: + # 0: 00318033 add x0,x3,x3 + # We remove the first two columns, leaving only the asm instruction. + for line in temp_objdump_file: + parts = line.split()[2:] + for part in parts: + write_objdump_file.write("\t") + write_objdump_file.write(part) + write_objdump_file.write("\n") + temp_objdump_file.close() + + +def print_file_status(args, filename, passes, fails, list_of_diffs): + filename_prefix = args.out + "/" + filename + if args.verbose: + prefix = "Checking " + filename + "..." + file1 = open(filename_prefix + ".s", "r") + file2 = open(filename_prefix + ".parsed_objdump", "r") + diff = difflib.ndiff(file1.readlines(), file2.readlines()) + file1.close() + file2.close() + delta = ''.join(x[0:] for x in diff if x.startswith(('- ', '+ '))) + if delta: + if args.verbose: + print(prefix + "FAILURE!") + fails = fails + 1 + list_of_diffs.append(filename) + list_of_diffs.append(delta) + else: + if args.verbose: + print(prefix + "SUCCESS!") + passes = passes + 1 + return passes, fails, list_of_diffs + + +def print_result(passes, fails, list1, list2, list3, list4, list5): + if len(list2) != 0: + print("Target LLVM Assembler failed to assemble these inputs...") + for item in list2: + print(item) + + # Golden (gnu) assembler fails + if len(list3) != 0: + print("GNU Assembler failed to assemble these inputs...") + for item in list3: + print(item) + + if fails != 0: + print("Target LLVM Assembler and golden GNU Assembler both assembled " + "these inputs, but input ASM string differed from" + "GNU disassembler-generated ASM string...") + for item in list5: + print(item) + print("Both LLVM Assembler and GNU Assembler fail: {}".format(len(list1))) + print("Only LLVM Assembler fails: {}".format(len(list2))) + print("Only GNU Assembler fails: {}".format(len(list3))) + print("Both LLVM Assembler and GNU Assembler pass: {}".format(len(list4))) + print("[Succeeded: {}/{}\tFailed: {}/{}]".format(passes, + len(list4), fails, len(list4))) + +if __name__ == "__main__": + main() Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/example_asm_proto.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/example_asm_proto.proto @@ -0,0 +1,59 @@ +//===-- example_asm_proto.proto - Protobuf description of ASM -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes a subset of ASM as a protobuf. It is used by the +/// example fuzzer to generate basic inputs to fuzz the llvm mc layer. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message Register { + enum ValueRange { + X0 = 1; X1 = 2; X2 = 3; X3 = 4; X4 = 5; + X5 = 6; X6 = 7; X7 = 8; X8 = 9; X9 = 10; + X10 = 11; X11 = 12; X12 = 13; X13 = 14; X14 = 15; + X15 = 16; X16 = 17; X17 = 18; X18 = 19; X19 = 20; + X20 = 21; X21 = 22; X22 = 23; X23 = 24; X24 = 25; + X25 = 26; X26 = 27; X27 = 28; X28 = 29; X29 = 30; + X30 = 31; X31 = 32; + }; + required ValueRange value = 1; +} + +message RTypeOpcode { + enum ValueRange { + ADD = 1; SUB = 2; + }; + required ValueRange value = 1; +} + +message RTypeOperands { + required Register operand1 = 1; + required Register operand2 = 2; + required Register operand3 = 3; +} + +message RTypeStatement { + required RTypeOpcode opcode = 1; + required RTypeOperands operands = 2; +} + +message AsmStatement { + oneof asmstatement_oneof { + RTypeStatement statement = 2; + } +} + +message Assembly { + repeated AsmStatement statements = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv.proto @@ -0,0 +1,119 @@ +//===-- riscv.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file serves as the base for the protobuf representation of the RISC-V +/// ISA. It is used to more easily find interesting inputs for fuzzing the +/// LLVM Machine Code layer. +/// Opcodes with prefix RV32 are specific to the RV32 base ISA; opcodes with +/// the prefix RV64 are specific to the RV64 base ISA; and opcodes with +/// neither prefix are common across the RV32 and RV64 base ISAs. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +import "riscv_operands.proto"; +import "riscv_a.proto"; +import "riscv_c.proto"; +import "riscv_d.proto"; +import "riscv_f.proto"; +import "riscv_i.proto"; +import "riscv_m.proto"; + +message Opcode { + oneof opcode_oneof { + // These opcodes are common across RV32 and RV64. + A_RFormat1 opcode1 = 1; + A_RFormat2 opcode2 = 2; + C_CBFormat opcode3 = 3; + C_CIFormat opcode4 = 4; + C_CIWFormat opcode5 = 5; + C_CJFormat opcode6 = 6; + C_CLFormat opcode7 = 7; + C_CRFormat1 opcode8 = 8; + C_CRFormat2 opcode9 = 9; + C_CSFormat1 opcode10 = 10; + C_CSFormat2 opcode11 = 11; + C_CSSFormat opcode12 = 12; + D_IFormatLoad opcode13 = 13; + D_R4Format opcode14 = 14; + D_RFormat1 opcode15 = 15; + D_RFormat2 opcode16 = 16; + D_SFormat opcode17 = 17; + F_IFormatLoad opcode18 = 18; + F_R4Format opcode19 = 19; + F_RFormat1 opcode20 = 20; + F_RFormat2 opcode21 = 21; + F_SFormat opcode22 = 22; + I_BFormat opcode23 = 23; + I_CSR_IFormat1 opcode24 = 24; + I_CSR_IFormat2 opcode25 = 25; + I_IFormat opcode26 = 26; + I_IFormatLoad opcode27 = 27; + I_JFormat opcode28 = 28; + I_RFormat opcode29 = 29; + I_SFormat opcode30 = 30; + I_UFormat opcode31 = 31; + M_RFormat opcode32 = 32; + + // These pseudo instructions are common across RV32 and RV64. + D_RegReg_Pseudo opcode33 = 33; + D_RegRegReg_Pseudo opcode34 = 34; + F_Imm_Pseudo opcode35 = 35; + F_Reg_Pseudo opcode36 = 36; + F_RegImm_Pseudo opcode37 = 37; + F_RegReg_Pseudo opcode38 = 38; + F_RegRegReg_Pseudo opcode39 = 39; + I_Imm_Pseudo opcode40 = 40; + I_ImmImm_Pseudo opcode41 = 41; + I_ImmReg_Pseudo opcode42 = 42; + I_NoOpnd_Pseudo opcode43 = 43; + I_Reg_Pseudo opcode44 = 44; + I_RegImm_Pseudo opcode45 = 45; + I_RegReg_Pseudo opcode46 = 46; + I_RegRegImm_Pseudo opcode47 = 47; + I_RegRegReg_Pseudo opcode48 = 48; + + // These opcodes are specific to RV32. + C_RV32_CIFormat opcode49 = 49; + C_RV32_CJFormat opcode50 = 50; + C_RV32_CLFormat opcode51 = 51; + C_RV32_CSFormat1 opcode52 = 52; + C_RV32_CSSFormat opcode53 = 53; + I_RV32_Reg_Pseudo opcode54 = 54; + + // These opcodes are specific to RV64. + A_RV64_RFormat1 opcode55 = 55; + A_RV64_RFormat2 opcode56 = 56; + C_RV64_CIFormat opcode57 = 57; + C_RV64_CLFormat opcode58 = 58; + C_RV64_CRFormat2 opcode59 = 59; + C_RV64_CSFormat1 opcode60 = 60; + C_RV64_CSSFormat opcode61 = 61; + D_RV64_RFormat2 opcode62 = 62; + F_RV64_RFormat2 opcode63 = 63; + I_RV64_IFormat opcode64 = 64; + I_RV64_IFormatLoad opcode65 = 65; + I_RV64_RFormat opcode66 = 66; + I_RV64_SFormat opcode67 = 67; + M_RV64_RFormat opcode68 = 68; + } +} + +message AsmStmt { + required Opcode opcode = 1; + repeated Operand operands = 2; +} + +message Assembly { + repeated AsmStmt stmts = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_a.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_a.proto @@ -0,0 +1,67 @@ +//===-- riscv_a.proto - Protobuf description of ASM -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the A-extension for the +/// RISC-V ISA, in Protobuf form. Opcodes with prefix RV32 are specific to the +/// RV32 base ISA; opcodes with the prefix RV64 are specific to the RV64 +/// base ISA; and opcodes with neither prefix are common across the RV32 +/// and RV64 base ISAs. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message A_RFormat1 { + enum ValueRange { + AMOADD_W = 0; + AMOAND_W = 1; + AMOMAX_W = 2; + AMOMAXU_W = 3; + AMOMIN_W = 4; + AMOMINU_W = 5; + AMOOR_W = 6; + AMOSWAP_W = 7; + AMOXOR_W = 8; + SC_W = 9; + }; + required ValueRange value = 1; +} + +message A_RFormat2 { + enum ValueRange { + LR_W = 0; + }; + required ValueRange value = 1; +} + +message A_RV64_RFormat1 { + enum ValueRange { + AMOADD_D = 0; + AMOAND_D = 1; + AMOMAX_D = 2; + AMOMAXU_D = 3; + AMOMIN_D = 4; + AMOMINU_D = 5; + AMOOR_D = 6; + AMOSWAP_D = 7; + AMOXOR_D = 8; + SC_D = 9; + }; + required ValueRange value = 1; +} + +message A_RV64_RFormat2 { + enum ValueRange { + LR_D = 0; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_c.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_c.proto @@ -0,0 +1,189 @@ +//===-- riscv_c.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the C-extension for the +/// RISC-V ISA, in Protobuf form. +/// Opcodes with prefix RV32 are specific to the RV32 base ISA; opcodes with +/// the prefix RV64 are specific to the RV64 base ISA; and opcodes with +/// neither prefix are common across the RV32 and RV64 base ISAs. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message C_CIFormat { + enum ValueRange { + C_ADDI = 0; + C_ADDI16SP = 1; + C_FLDSP = 2; + C_LI = 3; + C_LUI = 4; + C_LWSP = 5; + C_SLLI = 6; + } + required ValueRange value = 1; +} + +message C_CSSFormat { + enum ValueRange { + C_FSDSP = 0; + C_SWSP = 1; + } + required ValueRange value = 1; +} + +message C_CLFormat { + enum ValueRange { + C_FLD = 0; + C_LW = 1; + } + required ValueRange value = 1; +} + +message C_CSFormat1 { + enum ValueRange { + C_FSD = 0; + C_SW = 1; + } + required ValueRange value = 1; +} + +message C_CSFormat2 { + enum ValueRange { + C_AND = 0; + C_OR = 1; + C_SUB = 2; + C_XOR = 3; + } + required ValueRange value = 1; +} + +message C_CJFormat { + enum ValueRange { + C_J = 0; + } + required ValueRange value = 1; +} + +message C_CRFormat1 { + enum ValueRange { + C_JALR = 0; + C_JR = 1; + } + required ValueRange value = 1; +} + +message C_CRFormat2 { + enum ValueRange { + C_ADD = 0; + C_MV = 1; + } + required ValueRange value = 1; +} + +message C_CBFormat { + enum ValueRange { + C_ANDI = 0; + C_BEQZ = 1; + C_BNEZ = 2; + C_SRAI = 3; + C_SRLI = 4; + } + required ValueRange value = 1; +} + +message C_CIWFormat { + enum ValueRange { + C_ADDI4SPN = 0; + } + required ValueRange value = 1; +} + +message C_NoOperands_Format { + enum ValueRange { + C_EBREAK = 0; + C_NOP = 1; + } + required ValueRange value = 1; +} + +message C_RV32_CIFormat { + enum ValueRange { + C_FLWSP = 0; + } + required ValueRange value = 1; +} + +message C_RV32_CJFormat { + enum ValueRange { + C_JAL = 0; + } + required ValueRange value = 1; +} + +message C_RV32_CLFormat { + enum ValueRange { + C_FLW = 0; + } + required ValueRange value = 1; +} + +message C_RV32_CSFormat1 { + enum ValueRange { + C_FSW = 0; + } + required ValueRange value = 1; +} + +message C_RV32_CSSFormat { + enum ValueRange { + C_FSWSP = 0; + } + required ValueRange value = 1; +} + +message C_RV64_CIFormat { + enum ValueRange { + C_ADDIW = 0; + C_LDSP = 1; + }; + required ValueRange value = 1; +} + +message C_RV64_CLFormat { + enum ValueRange { + C_LD = 0; + }; + required ValueRange value = 1; +} + +message C_RV64_CRFormat2 { + enum ValueRange { + C_ADDW = 0; + C_SUBW = 1; + }; + required ValueRange value = 1; +} + +message C_RV64_CSFormat1 { + enum ValueRange { + C_SD = 0; + }; + required ValueRange value = 1; +} + +message C_RV64_CSSFormat { + enum ValueRange { + C_SDSP = 0; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_d.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_d.proto @@ -0,0 +1,106 @@ +//===-- riscv_d.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the D-extension for the +/// RISC-V ISA, in Protobuf form. +/// Opcodes with prefix RV32 are specific to the RV32 base ISA; opcodes with +/// the prefix RV64 are specific to the RV64 base ISA; and opcodes with +/// neither prefix are common across the RV32 and RV64 base ISAs. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message D_RFormat1 { + enum ValueRange { + FADD_D = 0; + FDIV_D = 1; + FEQ_D = 2; + FLE_D = 3; + FLT_D = 4; + FMAX_D = 5; + FMIN_D = 6; + FMUL_D = 7; + FSGNJN_D = 8; + FSGNJ_D = 9; + FSGNJX_D = 10; + FSUB_D = 11; + }; + required ValueRange value = 1; +} + +message D_RFormat2 { + enum ValueRange { + FCLASS_D = 0; + FCVT_D_S = 1; + FCVT_D_W = 2; + FCVT_D_WU = 3; + FCVT_S_D = 4; + FCVT_W_D = 5; + FCVT_WU_D = 6; + FSQRT_D = 7; + }; + required ValueRange value = 1; +} + +message D_R4Format { + enum ValueRange { + FMADD_D = 0; + FMSUB_D = 1; + FNMADD_D = 2; + FNMSUB_D = 3; + }; + required ValueRange value = 1; +} + +message D_IFormatLoad { + enum ValueRange { + FLD = 0; + }; + required ValueRange value = 1; +} + +message D_SFormat { + enum ValueRange { + FSD = 0; + }; + required ValueRange value = 1; +} + +message D_RegReg_Pseudo { + enum ValueRange { + FABS_D = 0; + FMV_D = 1; + FNEG_D = 2; + }; + required ValueRange value = 1; +} + +message D_RegRegReg_Pseudo { + enum ValueRange { + FGE_D = 0; + FGT_D = 1; + }; + required ValueRange value = 1; +} + +message D_RV64_RFormat2 { + enum ValueRange { + FCVT_D_L = 0; + FCVT_D_LU = 1; + FCVT_L_D = 2; + FCVT_LU_D = 3; + FMV_D_X = 4; + FMV_X_D = 5; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_f.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_f.proto @@ -0,0 +1,136 @@ +//===-- riscv_f.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the F-extension for the +/// RISC-V ISA, in Protobuf form. +/// Opcodes with prefix RV32 are specific to the RV32 base ISA; opcodes with +/// the prefix RV64 are specific to the RV64 base ISA; and opcodes with +/// neither prefix are common across the RV32 and RV64 base ISAs. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message F_RFormat1 { + enum ValueRange { + FADD_S = 0; + FDIV_S = 1; + FEQ_S = 2; + FLE_S = 3; + FLT_S = 4; + FMAX_S = 5; + FMIN_S = 6; + FMUL_S = 7; + FSGNJN_S = 8; + FSGNJ_S = 9; + FSGNJX_S = 10; + FSUB_S = 11; + }; + required ValueRange value = 1; +} + +message F_RFormat2 { + enum ValueRange { + FCLASS_S = 0; + FCVT_S_W = 1; + FCVT_S_WU = 2; + FCVT_W_S = 3; + FCVT_WU_S = 4; + FMV_X_W = 5; + FMV_W_X = 6; + FSQRT_S = 7; + }; + required ValueRange value = 1; +} + +message F_R4Format { + enum ValueRange { + FMADD_S = 0; + FMSUB_S = 1; + FNMADD_S = 2; + FNMSUB_S = 3; + }; + required ValueRange value = 1; +} + +message F_IFormatLoad { + enum ValueRange { + FLW = 0; + }; + required ValueRange value = 1; +} + +message F_SFormat { + enum ValueRange { + FSW = 0; + }; + required ValueRange value = 1; +} + +message F_Imm_Pseudo { + enum ValueRange { + FSFLAGSI = 0; + FSRMI = 1; + }; + required ValueRange value = 1; +} + +message F_Reg_Pseudo { + enum ValueRange { + FRCSR = 0; + FRFLAGS = 1; + FRRM = 2; + FSFLAGS = 3; + FSRM = 4; + }; + required ValueRange value = 1; +} + +message F_RegImm_Pseudo { + enum ValueRange { + FSFLAGSI = 0; + FSRMI = 1; + }; + required ValueRange value = 1; +} + +message F_RegReg_Pseudo { + enum ValueRange { + FABS_S = 0; + FMV_S = 1; + FMV_S_X = 2; + FMV_X_S = 3; + FNEG_S = 4; + FSFLAGS = 5; + FSRM = 6; + FSCSR = 7; + }; + required ValueRange value = 1; +} + +message F_RegRegReg_Pseudo { + enum ValueRange { + FGT_S = 0; + FGE_S = 1; + }; + required ValueRange value = 1; +} + +message F_RV64_RFormat2 { + enum ValueRange { + FCVT_L_S = 0; + FCVT_LU_S = 1; + FCVT_S_L = 2; + FCVT_S_LU = 3; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_fuzz_opnd_values.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_fuzz_opnd_values.proto @@ -0,0 +1,258 @@ +//===-- riscv_fuzz_opnd_values.proto - Protobuf description of ASM ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file serves as the base for the Protobuf representation of the RISC-V +/// ISA. It is used to more easily find interesting inputs for fuzzing the +/// LLVM Machine Code layer. It restricts operand types according to +/// instruction (for example, an ADD instruction will take three register +/// operands). Building the fuzzer with this Protobuf is useful for +/// generating instructions that are largely valid (or close to valid). +/// The values of the operands are fuzzed, but the types and number of operands +/// should be correct for the fuzzed instructions. +/// Opcodes with prefix RV32 are specific to the RV32 base ISA; opcodes with +/// the prefix RV64 are specific to the RV64 base ISA; and opcodes with +/// neither prefix are common across the RV32 and RV64 base ISAs. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +import "riscv_operands.proto"; +import "riscv_a.proto"; +import "riscv_c.proto"; +import "riscv_d.proto"; +import "riscv_f.proto"; +import "riscv_i.proto"; +import "riscv_m.proto"; + +// Note: The order of the fields does not matter for the grammar since +// the structure of the fuzzed ASM statements is handled in proto_to_asm. +message RegRegRegRegStmt { + required Register operand1 = 1; + required Register operand2 = 2; + required Register operand3 = 3; + required Register operand4 = 4; + optional RoundingMode operand5 = 5; + oneof opcode_oneof { + D_R4Format opcode1 = 6; + F_R4Format opcode2 = 7; + } +} + +message RegRegRegStmt1 { + required Register operand1 = 1; + required Register operand2 = 2; + required Register operand3 = 3; + optional RoundingMode operand4 = 4; + oneof opcode_oneof { + D_RFormat1 opcode1 = 5; + F_RFormat1 opcode2 = 6; + I_RFormat opcode3 = 7; + M_RFormat opcode4 = 8; + + D_RegRegReg_Pseudo opcode5 = 9; + F_RegRegReg_Pseudo opcode6 = 10; + I_RegRegReg_Pseudo opcode7 = 11; + + I_RV64_RFormat opcode8 = 12; + M_RV64_RFormat opcode9 = 13; + } +} + +message RegRegRegStmt2 { + required Register operand1 = 1; + required Register operand2 = 2; + required Register operand3 = 3; + oneof opcode_oneof { + A_RFormat1 opcode1 = 4; + A_RV64_RFormat1 opcode2 = 5; + } +} + +message RegRegImmStmt1 { + required Register operand1 = 1; + required Register operand2 = 2; + required Immediate operand3 = 3; + oneof opcode_oneof { + I_BFormat opcode1 = 4; + I_IFormat opcode2 = 5; + + I_RegRegImm_Pseudo opcode3 = 6; + + I_RV64_IFormat opcode4 = 7; + } +} + +message RegRegImmStmt2 { + required Register operand1 = 1; + required Register operand2 = 2; + required Immediate operand3 = 3; + oneof opcode_oneof { + C_CLFormat opcode1 = 4; + C_CSFormat1 opcode2 = 5; + D_IFormatLoad opcode3 = 6; + D_SFormat opcode4 = 7; + F_IFormatLoad opcode5 = 8; + F_SFormat opcode6 = 9; + I_IFormatLoad opcode7 = 10; + I_SFormat opcode8 = 11; + + C_RV32_CLFormat opcode9 = 12; + C_RV32_CSFormat1 opcode10 = 13; + + C_RV64_CLFormat opcode11 = 14; + C_RV64_CSFormat1 opcode12 = 15; + I_RV64_IFormatLoad opcode13 = 16; + I_RV64_SFormat opcode14 = 17; + } +} + +message RegRegStmt1 { + required Register operand1 = 1; + required Register operand2 = 2; + optional RoundingMode operand3 = 3; + oneof opcode_oneof { + C_CRFormat2 opcode1 = 4; + C_CSFormat2 opcode2 = 5; + D_RFormat2 opcode3 = 6; + F_RFormat2 opcode4 = 7; + + D_RegReg_Pseudo opcode5 = 9; + F_RegReg_Pseudo opcode6 = 10; + I_RegReg_Pseudo opcode7 = 11; + + C_RV64_CRFormat2 opcode8 = 12; + D_RV64_RFormat2 opcode9 = 13; + F_RV64_RFormat2 opcode10 = 14; + } +} + +message RegRegStmt2 { + required Register operand1 = 1; + required Register operand2 = 2; + oneof opcode_oneof { + A_RFormat2 opcode1 = 3; + A_RV64_RFormat2 opcode2 = 4; + } +} + +message RegImmStmt { + required Register operand1 = 1; + required Immediate operand2 = 2; + oneof opcode_oneof { + C_CBFormat opcode1 = 3; + C_CIFormat opcode2 = 4; + C_CIWFormat opcode3 = 5; + C_CSSFormat opcode4 = 6; + I_UFormat opcode5 = 7; + I_JFormat opcode6 = 8; + + F_RegImm_Pseudo opcode7 = 9; + I_RegImm_Pseudo opcode8 = 10; + + C_RV32_CIFormat opcode9 = 11; + C_RV32_CSSFormat opcode10 = 12; + + C_RV64_CIFormat opcode11 = 13; + C_RV64_CSSFormat opcode12 = 14; + } +} + +message RegImmRegStmt { + required Register operand1 = 1; + required Immediate operand2 = 2; + required Register operand3 = 3; + required I_CSR_IFormat1 opcode1 = 4; +} + +message ImmImmStmt { + required Immediate operand1 = 1; + required Immediate operand2 = 2; + oneof opcode_oneof { + I_ImmImm_Pseudo opcode1 = 3; + } +} + +message ImmRegStmt { + required Immediate operand1 = 1; + required Register operand2 = 2; + required I_ImmReg_Pseudo opcode1 = 3; +} + +message RegImmImmStmt { + required Register operand1 = 1; + required Immediate operand2 = 2; + required Immediate operand3 = 3; + required I_CSR_IFormat2 opcode1 = 4; +} + +message RegStmt { + required Register operand1 = 1; + oneof opcode_oneof { + C_CRFormat1 opcode1 = 2; + F_Reg_Pseudo opcode2 = 3; + I_Reg_Pseudo opcode3 = 4; + + I_RV32_Reg_Pseudo opcode4 = 5; + } +} + +message ImmStmt { + required Immediate operand1 = 1; + oneof opcode_oneof { + C_CJFormat opcode1 = 2; + + I_Imm_Pseudo opcode2 = 3; + F_Imm_Pseudo opcode3 = 4; + + C_RV32_CJFormat opcode4 = 5; + } +} + +message FenceStmt { + required IORWString operand1 = 1; + required IORWString operand2 = 2; + required I_Other_IFormat1 opcode1 = 3; +} + +message NoOperandsStmt { + oneof opcode_oneof { + C_NoOperands_Format opcode1 = 1; + I_NoOpnd_Pseudo opcode2 = 2; + I_Other_IFormat2 opcode3 = 3; + } +} + +message AsmStmt { + oneof asmstmt_oneof { + FenceStmt stmt1 = 1; + ImmStmt stmt2 = 2; + ImmImmStmt stmt3 = 3; + ImmRegStmt stmt4 = 4; + NoOperandsStmt stmt5 = 5; + RegImmStmt stmt6 = 6; + RegImmRegStmt stmt7 = 7; + RegImmImmStmt stmt8 = 8; + RegStmt stmt9 = 9; + RegRegImmStmt1 stmt10 = 10; + RegRegImmStmt2 stmt11 = 11; + RegRegStmt1 stmt12 = 12; + RegRegStmt2 stmt13 = 13; + RegRegRegStmt1 stmt14 = 14; + RegRegRegStmt2 stmt15 = 15; + RegRegRegRegStmt stmt16 = 16; + } +} + +message Assembly { + repeated AsmStmt stmts = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_i.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_i.proto @@ -0,0 +1,298 @@ +//===-- riscv_i.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the I-extension for the +/// RISC-V ISA, in Protobuf form. +/// Opcodes with prefix RV32 are specific to the RV32 base ISA; opcodes with +/// the prefix RV64 are specific to the RV64 base ISA; and opcodes with +/// neither prefix are common across the RV32 and RV64 base ISAs. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message I_RFormat { + enum ValueRange { + ADD = 0; + AND = 1; + OR = 2; + SLL = 3; + SLT = 4; + SLTU = 5; + SRA = 6; + SRL = 7; + SUB = 8; + XOR = 9; + }; + required ValueRange value = 1; +} + +message I_IFormat { + enum ValueRange { + ADDI = 0; + ANDI = 1; + JALR = 2; + ORI = 3; + SLLI = 4; + SLTI = 5; + SLTIU = 6; + SRAI = 7; + SRLI = 8; + XORI = 9; + }; + required ValueRange value = 1; +} + +message I_IFormatLoad { + enum ValueRange { + LB = 0; + LBU = 1; + LH = 2; + LHU = 3; + LW = 4; + }; + required ValueRange value = 1; +} + +message I_SFormat { + enum ValueRange { + SB = 0; + SH = 1; + SW = 2; + }; + required ValueRange value = 1; +} + +message I_BFormat { + enum ValueRange { + BEQ = 0; + BGE = 1; + BGEU = 2; + BLT = 3; + BLTU = 4; + BNE = 5; + }; + required ValueRange value = 1; +} + +message I_UFormat { + enum ValueRange { + AUIPC = 0; + LUI = 1; + }; + required ValueRange value = 1; +} + +message I_JFormat { + enum ValueRange { + JAL = 0; + }; + required ValueRange value = 1; +} + +message I_CSR_IFormat1 { + enum ValueRange { + CSRRC = 0; + CSRRS = 1; + CSRRW = 2; + }; + required ValueRange value = 1; +} + +message I_CSR_IFormat2 { + enum ValueRange { + CSRRCI = 0; + CSRRSI = 1; + CSRRWI = 2; + }; + required ValueRange value = 1; +} + +message I_Other_IFormat1 { + enum ValueRange { + FENCE = 0; + }; + required ValueRange value = 1; +} + +message I_Other_IFormat2 { + enum ValueRange { + EBREAK = 0; + ECALL = 1; + FENCE_I = 2; + }; + required ValueRange value = 1; +} + +message I_ImmReg_Pseudo { + // reg, imm + enum ValueRange { + CSRC = 0; + CSRR = 1; + CSRS = 2; + CSRW = 3; + }; + required ValueRange value = 1; +} + +message I_RegRegReg_Pseudo { + enum ValueRange { + SGT = 0; + SGTU = 1; + }; + required ValueRange value = 1; +} + +message I_RegImm_Pseudo { + enum ValueRange { + // reg, imm + BEQZ = 0; + BNEZ = 1; + BLEZ = 2; + BGEZ = 3; + BLTZ = 4; + BGTZ = 5; + LA = 6; + LB = 7; + LH = 8; + LI = 9; + LW = 10; + MV = 11; + MOVE = 12; + }; + required ValueRange value = 1; +} + +message I_RegReg_Pseudo { + // reg, reg + enum ValueRange { + MV = 0; + MOVE = 1; + NEG = 2; + NOT = 3; + SEQZ = 4; + SEXT_W = 5; + SGTZ = 6; + SLTZ = 7; + SNEZ = 8; + }; + required ValueRange value = 1; +} + +message I_ImmImm_Pseudo { + // imm, imm + enum ValueRange { + CSRCI = 0; + CSRSI = 1; + CSRWI = 2; + }; + required ValueRange value = 1; +} + +message I_RegRegImm_Pseudo { + // reg,reg,imm + enum ValueRange { + ADD = 0; + AND = 1; + BGT = 2; + BGTU = 3; + BLE = 4; + BLEU = 5; + OR = 6; + SLL = 7; + SLT = 8; + SLTU = 9; + SRA = 10; + SRL = 11; + XOR = 12; + }; + required ValueRange value = 1; +} + +message I_Reg_Pseudo { + enum ValueRange { + JALR = 0; + JR = 1; + RDCYCLE = 2; + RDINSTRET = 3; + RDTIME = 4; + }; + required ValueRange value = 1; +} + +message I_Imm_Pseudo { + enum ValueRange { + CALL = 0; + J = 1; + JAL = 2; + SFENCE_VMA = 3; + TAIL = 4; + }; + required ValueRange value = 1; +} + +message I_NoOpnd_Pseudo { + enum ValueRange { + FENCE = 0; + NOP = 1; + RET = 2; + SBREAK = 3; + SCALL = 4; + SFENCE_VMA = 5; + }; + required ValueRange value = 1; +} + +message I_RV32_Reg_Pseudo { + enum ValueRange { + RDCYCLEH = 0; + RDINSTRETH = 1; + RDTIMEH = 2; + }; + required ValueRange value = 1; +} + +message I_RV64_IFormat { + enum ValueRange { + ADDIW = 0; + SLLIW = 1; + SRAIW = 2; + SRLIW = 3; + }; + required ValueRange value = 1; +} + +message I_RV64_IFormatLoad { + enum ValueRange { + LD = 0; + LWU = 1; + }; + required ValueRange value = 1; +} + +message I_RV64_RFormat { + enum ValueRange { + ADDW = 0; + SLLW = 1; + SRAW = 2; + SRLW = 3; + SUBW = 4; + }; + required ValueRange value = 1; +} + +message I_RV64_SFormat { + enum ValueRange { + SD = 0; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_m.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_m.proto @@ -0,0 +1,46 @@ +//===-- riscv_m.proto - Protobuf description of ASM -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the M-extension for the +/// RISC-V ISA, in Protobuf form. +/// Opcodes with prefix RV32 are specific to the RV32 base ISA; opcodes with +/// the prefix RV64 are specific to the RV64 base ISA; and opcodes with +/// neither prefix are common across the RV32 and RV64 base ISAs. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message M_RFormat { + enum ValueRange { + DIV = 0; + DIVU = 1; + MUL = 2; + MULH = 3; + MULHSU = 4; + MULHU = 5; + REM = 6; + REMU = 7; + }; + required ValueRange value = 1; +} + +message M_RV64_RFormat { + enum ValueRange { + DIVUW = 0; + DIVW = 1; + MULW = 2; + REMUW = 3; + REMW = 4; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_operands.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/riscv_operands.proto @@ -0,0 +1,142 @@ +//===-- riscv_operands.proto - Protobuf description of ASM ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file is the Prootbuf representation of the operands in the RISC-V ISA. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message IORW { + enum ValueRange { + I = 0; + O = 1; + R = 2; + W = 3; + }; + required ValueRange value = 1; +} + +message IORWString { + repeated IORW iorwstr = 1; +} + +message LabelSuffix { + enum ValueRange { + B = 0; + F = 1; + }; + required ValueRange value = 1; +} + +message Modifier { + enum ValueRange { + HI = 0; + LO = 1; + PCREL_HI = 2; + PCREL_LO = 3; + }; + required ValueRange value = 1; +} + +message Label { + oneof label_oneof { + uint32 num = 1; + string str = 2; + } + optional LabelSuffix suffix = 3; +} + +message Immediate { + oneof imm_oneof { + sint32 s_imm = 1; + uint32 u_imm = 2; + Label label = 3; + } +} + +message GenPurposeRegister { + enum ValueRange { + X0 = 0; X1 = 1; X2 = 2; X3 = 3; X4 = 4; + X5 = 5; X6 = 6; X7 = 7; X8 = 8; X9 = 9; + X10 = 10; X11 = 11; X12 = 12; X13 = 13; X14 = 14; + X15 = 15; X16 = 16; X17 = 17; X18 = 18; X19 = 19; + X20 = 20; X21 = 21; X22 = 22; X23 = 23; X24 = 24; + X25 = 25; X26 = 26; X27 = 27; X28 = 28; X29 = 29; + X30 = 30; X31 = 31; + + ZERO = 32; RA = 33; SP = 34; GP = 35; TP = 36; + T0 = 37; T1 = 38; T2 = 39; S0 = 40; S1 = 41; + A0 = 42; A1 = 43; A2 = 44; A3 = 45; A4 = 46; + A5 = 47; A6 = 48; A7 = 49; S2 = 50; S3 = 51; + S4 = 52; S5 = 53; S6 = 54; S7 = 55; S8 = 56; + S9 = 57; S10 = 58; S11 = 59; T3 = 60; T4 = 61; + T5 = 62; T6 = 63; + }; + required ValueRange value = 1; +} + +message FloatingPointRegister { + enum ValueRange { + F0 = 0; F1 = 1; F2 = 2; F3 = 3; F4 = 4; + F5 = 5; F6 = 6; F7 = 7; F8 = 8; F9 = 9; + F10 = 10; F11 = 11; F12 = 12; F13 = 13; F14 = 14; + F15 = 15; F16 = 16; F17 = 17; F18 = 18; F19 = 19; + F20 = 20; F21 = 21; F22 = 22; F23 = 23; F24 = 24; + F25 = 25; F26 = 26; F27 = 27; F28 = 28; F29 = 29; + F30 = 30; F31 = 31; + + FT0 = 32; FT1 = 33; FT2 = 34; FT3 = 35; FT4 = 36; + FT5 = 37; FT6 = 38; FT7 = 39; FS0 = 40; FS1 = 41; + FA0 = 42; FA1 = 43; FA2 = 44; FA3 = 45; FA4 = 46; + FA5 = 47; FA6 = 48; FA7 = 49; FS2 = 50; FS3 = 51; + FS4 = 52; FS5 = 53; FS6 = 54; FS7 = 55; FS8 = 56; + FS9 = 57; FS10 = 58; FS11 = 59; FT8 = 60; FT9 = 61; + FT10 = 62; FT11 = 63; + }; + required ValueRange value = 1; +} + +message Register { + oneof reg_oneof { + GenPurposeRegister reg1 = 1; + FloatingPointRegister reg2 = 2; + } +} + +message RoundingMode { + enum ValueRange { + DYN = 0; + RDN = 1; + RMM = 2; + RNE = 3; + RTZ = 4; + RUP = 5; + }; + required ValueRange value = 1; +} + +message ImmRegPair { + required Immediate imm = 1; + required Register reg = 2; +} + +message Operand { + oneof operand_oneof { + IORWString iorwstr = 1; + Immediate imm = 2; + ImmRegPair pair = 3; + Register reg = 4; + RoundingMode mode = 5; + } + optional Modifier mod = 6; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/CMakeLists.txt @@ -0,0 +1,35 @@ +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD}) +set(CMAKE_CXX_FLAGS ${CXX_FLAGS_NOFUZZ}) + +# Needed by LLVM's CMake checks because this file defines multiple targets. +set(LLVM_OPTIONAL_SOURCES example_proto_to_asm.cpp proto_to_asm_main.cpp + proto_to_asm_riscv.cpp proto_to_asm_riscv_fuzz_opnd_values.cpp) + +llvm_add_library(mcProtoToASM example_proto_to_asm.cpp + DEPENDS mcASMProto + LINK_LIBS mcASMProto ${PROTOBUF_LIBRARIES} + ) + +llvm_add_library(mcRISCVProtoToASM proto_to_asm_riscv.cpp + DEPENDS mcRISCVASMProto + LINK_LIBS mcRISCVASMProto ${PROTOBUF_LIBRARIES} + ) + +llvm_add_library(mcRISCVFuzzOpndValuesProtoToASM + proto_to_asm_riscv_fuzz_opnd_values.cpp + DEPENDS mcRISCVFuzzOpndValuesASMProto + LINK_LIBS mcRISCVFuzzOpndValuesASMProto ${PROTOBUF_LIBRARIES} + ) + +add_llvm_executable(llvm-mc-assemble-proto-to-asm-example proto_to_asm_main.cpp) +add_llvm_executable(llvm-mc-assemble-proto-to-asm-riscv + proto_to_asm_main.cpp) +add_llvm_executable(llvm-mc-assemble-proto-to-asm-riscv-fuzz-opnd-values + proto_to_asm_main.cpp) + +target_link_libraries(llvm-mc-assemble-proto-to-asm-example + PRIVATE mcProtoToASM) +target_link_libraries(llvm-mc-assemble-proto-to-asm-riscv + PRIVATE mcRISCVProtoToASM) +target_link_libraries(llvm-mc-assemble-proto-to-asm-riscv-fuzz-opnd-values + PRIVATE mcRISCVFuzzOpndValuesProtoToASM) Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/example_proto_to_asm.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/example_proto_to_asm.cpp @@ -0,0 +1,88 @@ +//==-- example_proto_to_asm.cpp - Protobuf-ASM conversion ------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements functions for converting between protobufs for an example +// assembly language grammar and the assembly language instructions. +// +//===----------------------------------------------------------------------===// +#include "proto_to_asm.h" +#include "example_asm_proto.pb.h" + +#include +#include +#include +#include +#include + +using namespace google::protobuf; + +static void EmitAsm(std::ostream &OS, const EnumDescriptor *Enum, int Num) { + const EnumValueDescriptor *D = Enum->FindValueByNumber(Num); + std::string Msg = D->name(); + std::transform(Msg.begin(), Msg.end(), Msg.begin(), ::tolower); + OS << Msg; +} + +namespace mc_proto_fuzzer { + +template +void Emit(std::ostream &OS, const T &X) { + const EnumDescriptor *ED = X.ValueRange_descriptor(); + EmitAsm(OS, ED, X.value()); +} + +std::ostream &operator<<(std::ostream &OS, const Register &X) { + Emit(OS, X); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const RTypeOpcode &X) { + Emit(OS, X); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const RTypeOperands &X) { + OS << X.operand1(); + OS << "," << X.operand2(); + OS << "," << X.operand3(); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const RTypeStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operands() << "\n"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const AsmStatement &X) { + return OS << X.statement(); +} + +std::ostream &operator<<(std::ostream &OS, const Assembly &X) { + for (auto &ST : X.statements()) + OS << ST; + return OS; +} + +// --------------------------------- + +std::string FunctionToString(const Assembly &Input) { + std::ostringstream OS; + OS << Input; + return OS.str(); +} + +std::string ProtoToASM(const uint8_t *Data, size_t Size) { + Assembly Message; + if (!Message.ParsePartialFromArray(Data, Size)) + return "#error invalid proto\n"; + return FunctionToString(Message); +} + +} // namespace mc_proto_fuzzer Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.h =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.h @@ -0,0 +1,21 @@ +//==-- proto_to_asm.h - Protobuf-ASM conversion ----------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines functions for converting between protobufs and ASM. +// +//===----------------------------------------------------------------------===// + +#include + +namespace mc_proto_fuzzer { +class Assembly; + +std::string FunctionToString(const Assembly &input); +std::string ProtoToASM(const uint8_t *data, size_t size); +} Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_main.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_main.cpp @@ -0,0 +1,29 @@ +//==-- proto_to_asm_main.cpp - Driver for protobuf-ASM conversion ----------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements a simple driver to print an ASM program from a protobuf. +// +//===----------------------------------------------------------------------===// +#include "proto_to_asm.h" + +#include +#include +#include +#include + +int main(int argc, char **argv) { + for (int i = 0; i < argc; ++i) { + std::fstream in(argv[i]); + std::string str((std::istreambuf_iterator(in)), + std::istreambuf_iterator()); + std::cout << mc_proto_fuzzer::ProtoToASM( + reinterpret_cast(str.data()), str.size()); + } + return 0; +} Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_riscv.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_riscv.cpp @@ -0,0 +1,316 @@ +//==-- proto_to_asm_riscv.cpp - Protobuf-ASM conversion ---------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements functions for converting between Protobufs for the assembly +// language grammar for RISC-V instruction set and assembly language +// instructions. +// +//===----------------------------------------------------------------------===// +#include "proto_to_asm.h" +#include "riscv_operands.pb.h" +#include "riscv.pb.h" +#include "riscv_a.pb.h" +#include "riscv_c.pb.h" +#include "riscv_d.pb.h" +#include "riscv_f.pb.h" +#include "riscv_i.pb.h" +#include "riscv_m.pb.h" + +#include +#include +#include +#include +#include +#include + +using namespace google::protobuf; + +static void EmitAsm(std::ostream &OS, const EnumDescriptor * Enum, + int Num, bool SkipReplace = false) { + const EnumValueDescriptor * D = Enum->FindValueByNumber(Num); + std::string Msg = D->name(); + std::transform(Msg.begin(), Msg.end(), Msg.begin(), ::tolower); + if (!SkipReplace) + std::replace(Msg.begin(), Msg.end(), '_', '.'); + OS << Msg; +} + +namespace mc_proto_fuzzer { + +template +void Emit(std::ostream &OS, const T &X) { + const EnumDescriptor *ED = X.ValueRange_descriptor(); + EmitAsm(OS, ED, X.value()); +} + +std::ostream &operator<<(std::ostream &OS, const IORWString &X) { + for (auto &IORWChar : X.iorwstr()) + Emit(OS, IORWChar); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const Modifier &X) { + const EnumDescriptor *ED = X.ValueRange_descriptor(); + EmitAsm(OS, ED, X.value(), true /* SkipReplace */); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const LabelSuffix &X) { + Emit(OS, X); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const Label &X) { + // FIXME: Using a default text label. + if (X.has_num()) { + OS << X.num(); + if (X.has_suffix()) + OS << X.suffix(); + } else + OS << "foo"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const Immediate &X) { + if (X.has_s_imm()) + OS << X.s_imm(); + else if (X.has_u_imm()) + OS << X.u_imm(); + else if (X.has_label()) + OS << X.label(); + else + OS << "0"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const GenPurposeRegister &X) { + Emit(OS, X); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const FloatingPointRegister &X) { + Emit(OS, X); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const Register &X) { + if (X.has_reg1()) + OS << X.reg1(); + else if (X.has_reg2()) + OS << X.reg2(); + else + OS << "x0"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const RoundingMode &X) { + Emit(OS, X); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const ImmRegPair &X) { + return OS << X.imm() << "(" << X.reg() << ")"; +} + +std::ostream &operator<<(std::ostream &OS, const Operand &X) { + if (X.has_mod()) + OS << "%" << X.mod() << "("; + if (X.has_iorwstr()) + OS << X.iorwstr(); + else if (X.has_imm()) + OS << X.imm(); + else if (X.has_pair()) + OS << X.pair(); + else if (X.has_reg()) + OS << X.reg(); + else if (X.has_mode()) + OS << X.mode(); + else + OS << "x0"; + if (X.has_mod()) + OS << ")"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const Opcode &X) { + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else if (X.has_opcode3()) + Emit(OS, X.opcode3()); + else if (X.has_opcode4()) + Emit(OS, X.opcode4()); + else if (X.has_opcode5()) + Emit(OS, X.opcode5()); + else if (X.has_opcode6()) + Emit(OS, X.opcode6()); + else if (X.has_opcode7()) + Emit(OS, X.opcode7()); + else if (X.has_opcode8()) + Emit(OS, X.opcode8()); + else if (X.has_opcode9()) + Emit(OS, X.opcode9()); + else if (X.has_opcode10()) + Emit(OS, X.opcode10()); + else if (X.has_opcode11()) + Emit(OS, X.opcode11()); + else if (X.has_opcode12()) + Emit(OS, X.opcode12()); + else if (X.has_opcode13()) + Emit(OS, X.opcode13()); + else if (X.has_opcode14()) + Emit(OS, X.opcode14()); + else if (X.has_opcode15()) + Emit(OS, X.opcode15()); + else if (X.has_opcode16()) + Emit(OS, X.opcode16()); + else if (X.has_opcode17()) + Emit(OS, X.opcode17()); + else if (X.has_opcode18()) + Emit(OS, X.opcode18()); + else if (X.has_opcode19()) + Emit(OS, X.opcode19()); + else if (X.has_opcode20()) + Emit(OS, X.opcode20()); + else if (X.has_opcode21()) + Emit(OS, X.opcode21()); + else if (X.has_opcode22()) + Emit(OS, X.opcode22()); + else if (X.has_opcode23()) + Emit(OS, X.opcode23()); + else if (X.has_opcode24()) + Emit(OS, X.opcode24()); + else if (X.has_opcode25()) + Emit(OS, X.opcode25()); + else if (X.has_opcode26()) + Emit(OS, X.opcode26()); + else if (X.has_opcode27()) + Emit(OS, X.opcode27()); + else if (X.has_opcode28()) + Emit(OS, X.opcode28()); + else if (X.has_opcode29()) + Emit(OS, X.opcode29()); + else if (X.has_opcode30()) + Emit(OS, X.opcode30()); + else if (X.has_opcode31()) + Emit(OS, X.opcode31()); + else if (X.has_opcode32()) + Emit(OS, X.opcode32()); + else if (X.has_opcode33()) + Emit(OS, X.opcode33()); + else if (X.has_opcode34()) + Emit(OS, X.opcode34()); + else if (X.has_opcode35()) + Emit(OS, X.opcode35()); + else if (X.has_opcode36()) + Emit(OS, X.opcode36()); + else if (X.has_opcode37()) + Emit(OS, X.opcode37()); + else if (X.has_opcode38()) + Emit(OS, X.opcode38()); + else if (X.has_opcode39()) + Emit(OS, X.opcode39()); + else if (X.has_opcode40()) + Emit(OS, X.opcode40()); + else if (X.has_opcode41()) + Emit(OS, X.opcode41()); + else if (X.has_opcode42()) + Emit(OS, X.opcode42()); + else if (X.has_opcode43()) + Emit(OS, X.opcode43()); + else if (X.has_opcode44()) + Emit(OS, X.opcode44()); + else if (X.has_opcode45()) + Emit(OS, X.opcode45()); + else if (X.has_opcode46()) + Emit(OS, X.opcode46()); + else if (X.has_opcode47()) + Emit(OS, X.opcode47()); + else if (X.has_opcode48()) + Emit(OS, X.opcode48()); + else if (X.has_opcode49()) + Emit(OS, X.opcode49()); + else if (X.has_opcode50()) + Emit(OS, X.opcode50()); + else if (X.has_opcode51()) + Emit(OS, X.opcode51()); + else if (X.has_opcode52()) + Emit(OS, X.opcode52()); + else if (X.has_opcode53()) + Emit(OS, X.opcode53()); + else if (X.has_opcode54()) + Emit(OS, X.opcode54()); + else if (X.has_opcode55()) + Emit(OS, X.opcode55()); + else if (X.has_opcode56()) + Emit(OS, X.opcode56()); + else if (X.has_opcode57()) + Emit(OS, X.opcode57()); + else if (X.has_opcode58()) + Emit(OS, X.opcode58()); + else if (X.has_opcode59()) + Emit(OS, X.opcode59()); + else if (X.has_opcode60()) + Emit(OS, X.opcode60()); + else if (X.has_opcode61()) + Emit(OS, X.opcode61()); + else if (X.has_opcode62()) + Emit(OS, X.opcode62()); + else if (X.has_opcode63()) + Emit(OS, X.opcode63()); + else if (X.has_opcode64()) + Emit(OS, X.opcode64()); + else if (X.has_opcode65()) + Emit(OS, X.opcode65()); + else if (X.has_opcode66()) + Emit(OS, X.opcode66()); + else if (X.has_opcode67()) + Emit(OS, X.opcode67()); + else if (X.has_opcode68()) + Emit(OS, X.opcode68()); + else + return OS << "add"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const AsmStmt &X) { + OS << "\t" << X.opcode() << "\t"; + int NumOperands = 0; + for (auto &Opnd : X.operands()) { + if (NumOperands != 0) + OS << ","; + OS << Opnd; + ++NumOperands; + } + return OS << "\n"; +} + +std::ostream &operator<<(std::ostream &OS, const Assembly &X) { + for (auto &ST : X.stmts()) + OS << ST; + return OS; +} + +// --------------------------------- + +std::string FunctionToString(const Assembly &Input) { + std::ostringstream OS; + OS << Input; + return OS.str(); +} + +std::string ProtoToASM(const uint8_t *Data, size_t Size) { + Assembly Message; + if (!Message.ParsePartialFromArray(Data, Size)) + return "#error invalid proto\n"; + return FunctionToString(Message); +} + +} // namespace mc_proto_fuzzer Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_riscv_fuzz_opnd_values.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_riscv_fuzz_opnd_values.cpp @@ -0,0 +1,476 @@ +//==-- proto_to_asm_riscv_fuzz_opnd_values.cpp - Protobuf-ASM conversion ----==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements functions for converting between Protobufs for the assembly +// language grammar for RISC-V instruction set and assembly language +// instructions. +// +//===----------------------------------------------------------------------===// +#include "proto_to_asm.h" +#include "riscv_fuzz_opnd_values.pb.h" +#include "riscv_operands.pb.h" +#include "riscv_a.pb.h" +#include "riscv_c.pb.h" +#include "riscv_d.pb.h" +#include "riscv_f.pb.h" +#include "riscv_i.pb.h" +#include "riscv_m.pb.h" + +#include +#include +#include +#include +#include +#include + +using namespace google::protobuf; + +static void EmitAsm(std::ostream &OS, const EnumDescriptor * Enum, + int Num, bool SkipReplace = false) { + const EnumValueDescriptor *D = Enum->FindValueByNumber(Num); + std::string Msg = D->name(); + std::transform(Msg.begin(), Msg.end(), Msg.begin(), ::tolower); + if (!SkipReplace) + std::replace(Msg.begin(), Msg.end(), '_', '.'); + OS << Msg; +} + +namespace mc_proto_fuzzer { + +template +void Emit(std::ostream &OS, const T &X) { + const EnumDescriptor *ED = X.ValueRange_descriptor(); + EmitAsm(OS, ED, X.value()); +} + +std::ostream &operator<<(std::ostream &OS, const IORWString &X) { + for (auto &IORWChar : X.iorwstr()) + Emit(OS, IORWChar); + return OS; +} + +// FIXME: Add optional modifiers to each operand type. +std::ostream &operator<<(std::ostream &OS, const Modifier &X) { + const EnumDescriptor *ED = X.ValueRange_descriptor(); + EmitAsm(OS, ED, X.value(), true /* SkipReplace */); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const LabelSuffix &X) { + Emit(OS, X); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const Label &X) { + // FIXME: Using a default text label. + // FIXME: Using a numeric label with no f/b suffix. + if (X.has_num()) { + OS << X.num(); + } else + OS << "foo"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const Immediate &X) { + if (X.has_s_imm()) + OS << X.s_imm(); + else if (X.has_u_imm()) + OS << X.u_imm(); + else if (X.has_label()) + OS << X.label(); + else + OS << "0"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const GenPurposeRegister &X) { + Emit(OS, X); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const FloatingPointRegister &X) { + Emit(OS, X); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const Register &X) { + if (X.has_reg1()) + return OS << X.reg1(); + if (X.has_reg2()) + return OS << X.reg2(); + return OS << "x0"; +} + +std::ostream &operator<<(std::ostream &OS, const RoundingMode &X) { + Emit(OS, X); + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const RegRegRegRegStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else + OS << "fmadd.s"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << ","; + OS << X.operand3() << ","; + OS << X.operand4(); + if (X.has_operand5()) + OS << "," << X.operand5(); + return OS << "\n"; +} + +std::ostream &operator<<(std::ostream &OS, const RegRegRegStmt1 &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else if (X.has_opcode3()) + Emit(OS, X.opcode3()); + else if (X.has_opcode4()) + Emit(OS, X.opcode4()); + else if (X.has_opcode5()) + Emit(OS, X.opcode5()); + else if (X.has_opcode6()) + Emit(OS, X.opcode6()); + else if (X.has_opcode7()) + Emit(OS, X.opcode7()); + else if (X.has_opcode8()) + Emit(OS, X.opcode8()); + else if (X.has_opcode9()) + Emit(OS, X.opcode9()); + else + OS << "add"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << ","; + OS << X.operand3(); + if (X.has_operand4()) + OS << "," << X.operand4(); + return OS << "\n"; +} + +std::ostream &operator<<(std::ostream &OS, const RegRegRegStmt2 &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else + OS << "amoadd.w"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << ",("; + OS << X.operand3() << ")\n"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const RegRegImmStmt1 &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else if (X.has_opcode3()) + Emit(OS, X.opcode3()); + else if (X.has_opcode4()) + Emit(OS, X.opcode4()); + else + OS << "addi"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << ","; + OS << X.operand3() << "\n"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const RegRegImmStmt2 &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else if (X.has_opcode3()) + Emit(OS, X.opcode3()); + else if (X.has_opcode4()) + Emit(OS, X.opcode4()); + else if (X.has_opcode5()) + Emit(OS, X.opcode5()); + else if (X.has_opcode6()) + Emit(OS, X.opcode6()); + else if (X.has_opcode7()) + Emit(OS, X.opcode7()); + else if (X.has_opcode8()) + Emit(OS, X.opcode8()); + else if (X.has_opcode9()) + Emit(OS, X.opcode9()); + else if (X.has_opcode10()) + Emit(OS, X.opcode10()); + else if (X.has_opcode11()) + Emit(OS, X.opcode11()); + else if (X.has_opcode12()) + Emit(OS, X.opcode12()); + else if (X.has_opcode13()) + Emit(OS, X.opcode13()); + else if (X.has_opcode14()) + Emit(OS, X.opcode14()); + else + OS << "lw"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand3() << "("; + OS << X.operand2() << ")\n"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const RegRegStmt1 &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else if (X.has_opcode3()) + Emit(OS, X.opcode3()); + else if (X.has_opcode4()) + Emit(OS, X.opcode4()); + else if (X.has_opcode5()) + Emit(OS, X.opcode5()); + else if (X.has_opcode6()) + Emit(OS, X.opcode6()); + else if (X.has_opcode7()) + Emit(OS, X.opcode7()); + else if (X.has_opcode8()) + Emit(OS, X.opcode8()); + else if (X.has_opcode9()) + Emit(OS, X.opcode9()); + else if (X.has_opcode10()) + Emit(OS, X.opcode10()); + else + OS << "mv"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2(); + if (X.has_operand3()) + OS << "," << X.operand3(); + return OS << "\n"; +} + +std::ostream &operator<<(std::ostream &OS, const RegRegStmt2 &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else + OS << "lr.w"; + OS << "\t"; + OS << X.operand1() << ",(" << X.operand2() << ")\n"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const RegImmStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else if (X.has_opcode3()) + Emit(OS, X.opcode3()); + else if (X.has_opcode4()) + Emit(OS, X.opcode4()); + else if (X.has_opcode5()) + Emit(OS, X.opcode5()); + else if (X.has_opcode6()) + Emit(OS, X.opcode6()); + else if (X.has_opcode7()) + Emit(OS, X.opcode7()); + else if (X.has_opcode8()) + Emit(OS, X.opcode8()); + else if (X.has_opcode9()) + Emit(OS, X.opcode9()); + else if (X.has_opcode10()) + Emit(OS, X.opcode10()); + else if (X.has_opcode11()) + Emit(OS, X.opcode11()); + else if (X.has_opcode12()) + Emit(OS, X.opcode12()); + else + OS << "c.fldsp"; + OS << "\t"; + OS << X.operand1() << "," << X.operand2() << "\n"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const RegImmImmStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else + OS << "csrrwi"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << ","; + OS << X.operand3() << "\n"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const RegImmRegStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else + OS << "csrrw"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << ","; + OS << X.operand3() << "\n"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const RegStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else if (X.has_opcode3()) + Emit(OS, X.opcode3()); + else + OS << "c.jr"; + OS << "\t"; + OS << X.operand1() << "\n"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const ImmStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else if (X.has_opcode3()) + Emit(OS, X.opcode3()); + else if (X.has_opcode4()) + Emit(OS, X.opcode4()); + else + OS << "c.jal"; + OS << "\t"; + OS << X.operand1() << "\n"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const ImmImmStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else + OS << "csrci"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << "\n"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const ImmRegStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else + OS << "csrc"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << "\n"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const NoOperandsStmt &X) { + if (X.has_opcode1()) { + OS << "\t"; + Emit(OS, X.opcode1()); + return OS << "\n"; + } + if (X.has_opcode2()) { + OS << "\t"; + Emit(OS, X.opcode1()); + return OS << "\n"; + } + return OS << "\tnop\n"; +} + +std::ostream &operator<<(std::ostream &OS, const FenceStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else + OS << "fence"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << "\n"; + return OS; +} + +std::ostream &operator<<(std::ostream &OS, const AsmStmt &X) { + if (X.has_stmt1()) + return OS << X.stmt1(); + if (X.has_stmt2()) + return OS << X.stmt2(); + if (X.has_stmt3()) + return OS << X.stmt3(); + if (X.has_stmt4()) + return OS << X.stmt4(); + if (X.has_stmt5()) + return OS << X.stmt5(); + if (X.has_stmt6()) + return OS << X.stmt6(); + if (X.has_stmt7()) + return OS << X.stmt7(); + if (X.has_stmt8()) + return OS << X.stmt8(); + if (X.has_stmt9()) + return OS << X.stmt9(); + if (X.has_stmt10()) + return OS << X.stmt10(); + if (X.has_stmt11()) + return OS << X.stmt11(); + if (X.has_stmt12()) + return OS << X.stmt12(); + if (X.has_stmt13()) + return OS << X.stmt13(); + if (X.has_stmt14()) + return OS << X.stmt14(); + return OS << "\tnop\n"; +} + +std::ostream &operator<<(std::ostream &OS, const Assembly &X) { + for (auto &ST : X.stmts()) + OS << ST; + return OS; +} + +// --------------------------------- + +std::string FunctionToString(const Assembly &Input) { + std::ostringstream OS; + OS << Input; + return OS.str(); +} + +std::string ProtoToASM(const uint8_t *Data, size_t Size) { + Assembly Message; + if (!Message.ParsePartialFromArray(Data, Size)) + return "#error invalid proto\n"; + return FunctionToString(Message); +} + +} // namespace mc_proto_fuzzer