Index: tools/llvm-mc-assemble-proto-fuzzer/CMakeLists.txt =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/CMakeLists.txt +++ tools/llvm-mc-assemble-proto-fuzzer/CMakeLists.txt @@ -12,8 +12,8 @@ # Needed by LLVM's CMake checks because this file defines multiple targets. set(LLVM_OPTIONAL_SOURCES DummyMCFuzzer.cpp - MCFuzzer.cpp ExampleMCProtoFuzzer.cpp + RV32MCProtoFuzzer.cpp ) if(CLANG_ENABLE_ASSEMBLE_PROTO_FUZZER) @@ -23,11 +23,31 @@ add_definitions(-DGOOGLE_PROTOBUF_NO_RTTI) include_directories(${PROTOBUF_INCLUDE_DIRS}) include_directories(${CMAKE_CURRENT_BINARY_DIR}) - protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS asm_proto.proto) + protobuf_generate_cpp(EXAMPLE_PROTO_SRCS EXAMPLE_PROTO_HDRS + proto-files/example_asm_proto.proto) + protobuf_generate_cpp(RV32_PROTO_SRCS RV32_PROTO_HDRS + proto-files/rv32.proto + proto-files/rv32_operands.proto + proto-files/rv32a.proto + proto-files/rv32c.proto + proto-files/rv32d.proto + proto-files/rv32f.proto + proto-files/rv32i.proto + proto-files/rv32m.proto) + set(LLVM_OPTIONAL_SOURCES ${LLVM_OPTIONAL_SOURCES} ${PROTO_SRCS}) + add_clang_library(mcASMProto - ${PROTO_SRCS} - ${PROTO_HDRS} + ${EXAMPLE_PROTO_SRCS} + ${EXAMPLE_PROTO_HDRS} + + LINK_LIBS + ${PROTOBUF_LIBRARIES} + ) + + add_clang_library(mcRv32ASMProto + ${RV32_PROTO_SRCS} + ${RV32_PROTO_HDRS} LINK_LIBS ${PROTOBUF_LIBRARIES} @@ -37,6 +57,9 @@ include(ProtobufMutatorMC) include_directories(${ProtobufMutator_INCLUDE_DIRS}) + # Build the .proto files. + add_clang_subdirectory(proto-files) + # Build the protobuf->C++ translation library and driver. add_clang_subdirectory(proto-to-asm) @@ -48,6 +71,11 @@ ExampleMCProtoFuzzer.cpp ) + add_clang_executable(llvm-mc-assemble-proto-fuzzer-rv32 + ${DUMMY_MAIN} + RV32MCProtoFuzzer.cpp + ) + set(COMMON_PROTO_FUZZ_LIBRARIES ${ProtobufMutator_LIBRARIES} ${PROTOBUF_LIBRARIES} @@ -62,6 +90,13 @@ mcProtoToASM ) + target_link_libraries(llvm-mc-assemble-proto-fuzzer-rv32 + PRIVATE + ${COMMON_PROTO_FUZZ_LIBRARIES} + mcRv32ASMProto + mcRv32ProtoToASM + ) + endif() add_clang_subdirectory(handle-asm) Index: tools/llvm-mc-assemble-proto-fuzzer/ExampleMCProtoFuzzer.cpp =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/ExampleMCProtoFuzzer.cpp +++ tools/llvm-mc-assemble-proto-fuzzer/ExampleMCProtoFuzzer.cpp @@ -14,7 +14,7 @@ /// //===----------------------------------------------------------------------===// -#include "asm_proto.pb.h" +#include "example_asm_proto.pb.h" #include "handle-asm/handle_asm.h" #include "proto-to-asm/proto_to_asm.h" #include "src/libfuzzer/libfuzzer_macro.h" Index: tools/llvm-mc-assemble-proto-fuzzer/MCFuzzer.cpp =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/MCFuzzer.cpp +++ /dev/null @@ -1,29 +0,0 @@ -//===-- MCFuzzer.cpp - Fuzz MC Layer --------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements a function that runs assembler on a single -/// input. This function is then linked into the Fuzzer library. -/// -//===----------------------------------------------------------------------===// - -#include "asm_proto.pb.h" -#include "handle-asm/handle_asm.h" -#include "proto-to-asm/proto_to_asm.h" -#include "src/libfuzzer/libfuzzer_macro.h" - -using namespace mc_proto_fuzzer; - -extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { return 0; } - -extern "C" int LLVMFuzzerTestOneInput(uint8_t *data, size_t size) { - std::string s((const char *)data, size); - HandleASM(s); - return 0; -} Index: tools/llvm-mc-assemble-proto-fuzzer/README.txt =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/README.txt +++ tools/llvm-mc-assemble-proto-fuzzer/README.txt @@ -10,7 +10,9 @@ -DCMAKE_PREFIX_PATH=/path/to/install \ -DPBM_FUZZ_PATH=/full/path/to/protobuf/install \ -DPBM_REPO=file:///full/path/to/libprotobuf-mutator ../../llvm -$ ninja -v llvm-mc-assemble-proto-fuzzer llvm-mc-assemble-proto-to-asm +$ ninja -v llvm-mc-assemble-proto-fuzzer llvm-mc-assemble-proto-to-asm \ + llvm-mc-assemble-proto-fuzzer-rv32 \ + llvm-mc-assemble-proto-to-asm-rv32 \ 2>&1 | tee log ------------------------------------------------------------------------------- @@ -29,21 +31,14 @@ fuzzer with, including the corpus directory and the full path to objdump. For example: -$ python mcfuzz.py --corpus corpus --max-len 32 --runs 10 --triple riscv32 - --out outputdir --objdump /full/path/to/riscv32-unknown-linux-gnu-objdump - --mattr +c +$ python mcfuzz.py --corpus corpus --max-len 32 --runs 10 --triple riscv32 \ + --out outputdir --objdump /full/path/to/riscv32-unknown-linux-gnu-objdump \ + --mattr +a,+c,+m,+d,+f --march rv32imafdc \ + --assemble /full/path/to/riscv32-unknown-linux-gnu-as Run the script with the --verbose flag if you would like to see the output of the fuzzer as it runs. -Use the --riscv-no-aliases flag if you want the generated .s files (from -proto-to-asm) to be printed in their original form (ie to print - `sub x3,x0,x3` to the file instead of - `neg x3,x4`). Note that using this flag (--riscv-no-aliases) may cause - some test cases to fail, as the default behavior of the compiler is to replace - `sub ,x0,` with a `neg` instruction. Aliases are on by default - (NoAliases=False). - NOTE: If you run the script without specifying an ouptut directory [--out], the script will terminate after fuzzing the corpus a specified number [--runs] of times. @@ -52,14 +47,32 @@ contain the generated corpus files. Next, we populate the output directory by iterating through all the files -currently in the corpus. We use the llvm-mc-assemble-proto-to-asm tool to -generate the .s files. We use the llvm-mc-assemble-proto-fuzzer tool, invoked -on each file in the corpus, with -filetype=obj, and -runs=1, to generate a +currently in the corpus. First, we run the golden AS (GNU) and check its +behavior (successfully assembled, or failed to assemble) against the behavior of +our fuzz target (LLVM-MC AS). If and only if both assemblers assemble the input, +we proceed. + +We use the llvm-mc-assemble-proto-to-asm tool to generate the .s files. +We use the llvm-mc-assemble-proto-fuzzer tool, invoked on each file +in the corpus, with -filetype=obj, and -runs=1, to generate a corresponding .o file. Finally, we call objdump on the .o file to produce a .objdump file, which can be compared (after some processing) to the .s file. Thus, the output directory should contain .o, .s, and .objdump files when the script concludes. -The last part of the script runs a diff of all the .s and .objdump files, and -prints out a summary of results (how many diffs passed and how many failed). +The last part of the script prints out a summary of results: on which files the +two assemblers (fuzz target LLVM AS and golden assembler GNU AS) both failed, on +which files only one assembler failed, and on which files both assemblers +successfully assembled the input ASM. + +The printed output will show the ASM statements that caused the fuzz target LLVM +AS to fail (but not the golden AS). It will also print out the ASM +statements that both assemblers assembled if there was any differences +between the golden disassembler's output and our original input ASM. + +------------------------------------------------------------------------------- +Notes: +------------------------------------------------------------------------------- +Some instructions are given aliases by the golden disassembler, and therefore +the diff sometimes fails even when both assemblers can assemble the input ASM. Index: tools/llvm-mc-assemble-proto-fuzzer/RV32MCProtoFuzzer.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/RV32MCProtoFuzzer.cpp @@ -0,0 +1,27 @@ +//===-- RV32MCProtoFuzzer.cpp - Fuzz Assembler ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a function that runs llvm mc assemble on a single +/// input and uses libprotobuf-mutator to find new inputs. This function is +/// then linked into the Fuzzer library. +/// +//===----------------------------------------------------------------------===// + +#include "handle-asm/handle_asm.h" +#include "proto-to-asm/proto_to_asm.h" +#include "rv32.pb.h" +#include "src/libfuzzer/libfuzzer_macro.h" + +using namespace mc_proto_fuzzer; + +DEFINE_BINARY_PROTO_FUZZER(const Assembly& input) { + auto S = FunctionToString(input); + HandleASM(S); +} Index: tools/llvm-mc-assemble-proto-fuzzer/asm_proto.proto =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/asm_proto.proto +++ /dev/null @@ -1,61 +0,0 @@ -//===-- asm_proto.proto - Protobuf description of ASM ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file describes a subset of ASM as a protobuf. It is used to -/// more easily find interesting inputs for fuzzing llvm mc layer. -/// -//===----------------------------------------------------------------------===// - -syntax = "proto2"; - -message Register { - enum RegName { - X0 = 1; X1 = 2; X2 = 3; X3 = 4; X4 = 5; X5 = 6; X6 = 7; X7 = 8; X8 = 9; - X9 = 10; X10 = 11; X11 = 12; X12 = 13; X13 = 14; X14 = 15; X15 = 16; - X16 = 17; X17 = 18; X18 = 19; X19 = 20; X20 = 21; X21 = 22; X22 = 23; - X23 = 24; X24 = 25; X25 = 26; X26 = 27; X27 = 28; X28 = 29; X29 = 30; - X30 = 31; X31 = 32; - }; - required RegName name = 1; -} - -message ITypeOpcode { - enum Op { - ADD = 1; SUB = 2; - }; - required Op op = 1; -} - -message ITypeOperands { - required Register operand1 = 1; - required Register operand2 = 2; - required Register operand3 = 3; -} - -message ITypeStatement { - required ITypeOpcode opcode = 1; - required ITypeOperands operands = 2; -} - -message AsmStatement { - oneof asmstatement_oneof { - ITypeStatement statement = 2; - } -} - -message AsmStatementSeq { - repeated AsmStatement statements = 1; -} - -message Assembly { - required AsmStatementSeq asmStatements = 1; -} - -package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/example_mcfuzz.py =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/example_mcfuzz.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python + +import argparse +import difflib +import os +import shlex +import subprocess +import sys +import tempfile + +parser = argparse.ArgumentParser() + +# Flags for the directory names, corpus and outputdir. +parser.add_argument("--corpus", type=str, help="corpus directory name", + required=True) +parser.add_argument("--out", type=str, + help="output directory name for obj files", default=None) + +# Flags for llvm-mc-assemble-proto-fuzzer: triple, mattr, filetype. +parser.add_argument("--triple", type=str, help="specify the triple", + default="riscv32") +parser.add_argument("--mattr", type=str, help="specify mattr", + default="") +parser.add_argument("--filetype", type=str, help="asm or obj", + default="obj") + +# These args are passed in after the -fuzzer-args flag. +parser.add_argument("--runs", type=int, help="number of runs", default=100) +parser.add_argument("--max-len", type=int, + help="limit instruction size for fuzzing", default=40) + +# Flag specifies the name of objdump executable. +parser.add_argument("--objdump", type=str, help="specify the path to" + " objdump", + default="/prj/llvm-arm/home/common/build_tools/" + "riscv32-gnu-7.2.0/bin/" + "riscv32-unknown-linux-gnu-objdump") + +# Flag specifies that we should print out everything +parser.add_argument("--verbose", dest="verbose", action="store_true") +parser.set_defaults(verbose=False) + +args = parser.parse_args() + +# Step 1: Invoke fuzzer to generate a corpus. +cmd = 'llvm-mc-assemble-proto-fuzzer {corpus} -triple={triple} ' \ + + '-mattr={mattr} -filetype={filetype} -fuzzer-args ' \ + + '-runs={runs} -max_len={max_len}' +cmd = cmd.format(corpus=args.corpus, triple=args.triple, mattr=args.mattr, + filetype=args.filetype, runs=args.runs, max_len=args.max_len) +fuzz_command = shlex.split(cmd) + +fuzz_proc = subprocess.Popen(fuzz_command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) +fuzz_out, fuzz_err = fuzz_proc.communicate() +if fuzz_proc.returncode != 0: + raise ValueError('failed to run fuzz {}: {}'.format(fuzz_command, + fuzz_err)) +if args.verbose: + print(fuzz_out) + +# If user specified an output directory, proceed to step 2; otherwise, exit. +if args.out is None: + print("No output directory specified; exiting after populating " + "corpus directory.") + sys.exit(0) + +# Keep track of which diffs pass / fail. +passes = 0 +fails = 0 +list_of_failures = [] +# Iterate through the corpus body. +# For each file, generate a .s and a .o file. Then, use objdump to +# generate a .objdump file, which we will compare to the .s file. +# Diff the .s with the corresponding .objdump file. +for filename in os.listdir(args.corpus): + + filename_prefix = args.out + "/" + filename + + # Step 2: Run proto-to-asm on corpus file to generate .s file. + proto_to_asm_command = ["llvm-mc-assemble-proto-to-asm", + args.corpus + "/" + filename] + asm_file = open(filename_prefix + ".s", "w+r") + asm_proc = subprocess.Popen(proto_to_asm_command, stdout=asm_file, + stderr=subprocess.STDOUT) + asm_out, asm_err = asm_proc.communicate() + if asm_proc.returncode != 0: + raise ValueError('failed to run {}: {}'.format(proto_to_asm_command, + asm_err)) + asm_file.close() + + # Step 3: Generate .o files in the outputdir. + cmd = 'llvm-mc-assemble-proto-fuzzer {corpus}/{file} -triple={triple}' \ + + ' -mattr={mattr} -out={out} -filetype=obj -fuzzer-args -runs=1' + cmd = cmd.format(corpus=args.corpus, file=filename, triple=args.triple, + mattr=args.mattr, out=args.out) + obj_files_command = shlex.split(cmd) + obj_proc = subprocess.Popen(obj_files_command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + obj_out, obj_err = obj_proc.communicate() + if obj_proc.returncode != 0: + raise ValueError('failed to run fuzzer {}: {}'.format( + obj_files_command, obj_err)) + if args.verbose: + print(obj_out) + + # Step 4: Call objdump on each .o file in the output directory, + # to generate corresponding .objdump files. + objdump_file = open(filename_prefix + ".objdump", "w+r") + cmd = '{objdump} -dr -M numeric {filename}.o' + cmd = cmd.format(objdump=args.objdump, filename=filename_prefix) + objdump_command = shlex.split(cmd) + objdump_proc = subprocess.Popen(objdump_command, + stdout=objdump_file, + stderr=subprocess.STDOUT) + objdump_out, objdump_err = objdump_proc.communicate() + if objdump_proc.returncode != 0: + raise ValueError('failed to run objdump {}: ' + + '{}'.format(objdump_command, objdump_err)) + + # Step 5: Process files generated by objdump so that the files only contain + # instructions. + objdump_file.seek(0) + temp_objdump_file = tempfile.TemporaryFile("w+r") + # Only the lines containing asm instructions have tabs in them; remove all + # other lines (for example, header lines and whitespace). + for line in objdump_file: + if "\t" in line: + temp_objdump_file.write(line) + objdump_file.close() + with open(filename_prefix + ".parsed_objdump", "w") as write_objdump_file: + temp_objdump_file.seek(0) + # Each line of the objdump output looks something like this: + # 0: 00318033 add x0,x3,x3 + # We remove the first two columns, leaving only the asm instruction. + for line in temp_objdump_file: + parts = line.split()[2:] + for part in parts: + write_objdump_file.write("\t") + write_objdump_file.write(part) + write_objdump_file.write("\n") + temp_objdump_file.close() + + # Step 6: Diff the .s file generated by proto-to-asm and the .objdump file + # generated by objdump. + prefix = "Checking " + filename + "..." + file1 = open(filename_prefix + ".parsed_s", "r") + file2 = open(filename_prefix + ".parsed_objdump", "r") + diff = difflib.ndiff(file1.readlines(), file2.readlines()) + delta = ''.join(x[0:] for x in diff if x.startswith(('- ', '+ '))) + if delta: + print(prefix + "FAILURE!") + fails = fails + 1 + list_of_failures.append(filename) + list_of_failures.append(delta) + else: + print(prefix + "SUCCESS!") + passes = passes + 1 + +print("Succeeded: " + str(passes) + "\t\tFailed: " + str(fails) + + "\t\t[Total: " + str(passes + fails) + "]") + +if (fails != 0): + print("The following files failed...") + for item in list_of_failures: + print(item) + +sys.exit(0 if fails == 0 else 1) Index: tools/llvm-mc-assemble-proto-fuzzer/handle-asm/handle_asm.cpp =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/handle-asm/handle_asm.cpp +++ tools/llvm-mc-assemble-proto-fuzzer/handle-asm/handle_asm.cpp @@ -33,11 +33,11 @@ using namespace mc_proto_fuzzer; static cl::opt OutputDirname("out", - cl::desc("Directory name for " - "llvm-mc-assemble-proto-fuzzer " - "output when filetype=obj"), - cl::value_desc("dirname"), - cl::init("./outputdir")); + cl::desc("Directory name for " + "llvm-mc-assemble-proto-fuzzer " + "output when filetype=obj"), + cl::value_desc("dirname"), + cl::init("./outputdir")); static cl::opt TripleName("triple", cl::desc("Target triple to assemble for, " Index: tools/llvm-mc-assemble-proto-fuzzer/mcfuzz.py =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/mcfuzz.py +++ tools/llvm-mc-assemble-proto-fuzzer/mcfuzz.py @@ -8,133 +8,261 @@ import sys import tempfile -parser = argparse.ArgumentParser() - -# Flags for the directory names, corpus and outputdir. -parser.add_argument("--corpus", type=str, help="corpus directory name", \ - required=True) -parser.add_argument("--out", type=str, \ - help="output directory name for obj files", default=None) - -# Flags for llvm-mc-assemble-proto-fuzzer: triple, mattr, filetype. -parser.add_argument("--triple", type=str, help="specify the triple", \ - default="riscv32") -parser.add_argument("--mattr", type=str, help="specify mattr", \ - default="") -parser.add_argument("--filetype", type=str, help="asm or obj", \ - default="obj") - -# These args are passed in after the -fuzzer-args flag. -parser.add_argument("--runs", type=int, help="number of runs", default=100) -parser.add_argument("--max-len", type=int, \ - help="limit instruction size for fuzzing", default=40) - -# Flag specifies the name of objdump executable. -parser.add_argument("--objdump", type=str, help="specify the path to" \ - " objdump", default="/prj/llvm-arm/home/common/build_tools/" \ - "riscv32-gnu-7.2.0/bin/riscv32-unknown-linux-gnu-objdump") - -# Flag specifies that we should print out everything -parser.add_argument("--verbose", dest="verbose", action="store_true") -parser.set_defaults(verbose=False) - -parser.add_argument("--riscv-no-aliases", dest="NoAliases", - action="store_true") -parser.set_defaults(NoAliases=False) - -args = parser.parse_args() - -# Step 1: Invoke fuzzer to generate a corpus. -cmd = 'llvm-mc-assemble-proto-fuzzer {corpus} -triple={triple} -mattr={mattr}' \ - + ' -filetype={filetype} -fuzzer-args -runs={runs} -max_len={max_len}' -cmd = cmd.format(corpus=args.corpus, triple=args.triple, mattr=args.mattr, \ - filetype=args.filetype, runs=args.runs, max_len=args.max_len) -fuzz_command = shlex.split(cmd) - -fuzz_proc = subprocess.Popen(fuzz_command, stdout=subprocess.PIPE, \ - stderr=subprocess.STDOUT) -fuzz_out, fuzz_err = fuzz_proc.communicate() -if fuzz_proc.returncode != 0: - raise ValueError('failed to run fuzz {}: {}'.format(fuzz_command, fuzz_err)) -if args.verbose: - print(fuzz_out) - -# If user specified an output directory, proceed to step 2; otherwise, exit. -if args.out is None: - print("No output directory specified; exiting after populating " - "corpus directory.") - sys.exit(0) - -# Keep track of which diffs pass / fail. -passes = 0 -fails = 0 -list_of_failures = [] -# Iterate through the corpus body. -# For each file, generate a .s and a .o file. Then, use objdump to -# generate a .objdump file, which we will compare to the .s file. -# Diff the .s with the corresponding .objdump file. -for filename in os.listdir(args.corpus): - filename_prefix = args.out + "/" + filename +def main(): + parser = argparse.ArgumentParser() + parse_arguments(parser) + args = parser.parse_args() + + # Step 1: Invoke fuzzer to generate a corpus. + call_fuzzer(args) + + # If user did not specify output dir, skip steps 2-8; exit. + if args.out is None: + print("No output directory specified; exiting after populating " + "corpus directory.") + sys.exit(0) + + # List of corpus files that both LLVM AS and GNU AS fail to assemble. + list_of_llvm_as_and_gnu_as_fails = [] + # List of corpus files that only LLVM AS fails to assemble. + list_of_llvm_as_fails = [] + # List of corpus files that only GNU AS fails to assemble. + list_of_gnu_as_fails = [] + # List of corpus files that both LLVM AS and GNU AS can assemble. + list_of_llvm_as_and_gnu_as_passes = [] + # Keep track of which diffs pass / fail. + passes = 0 + fails = 0 + list_of_diffs = [] + + # Iterate through the corpus body. + + # For each file, re-run the fuzzer to generate an object file with the fuzz + # target. Also run the golden assembler (gnu as) to generate a reference + # object file. If both assemblers behave the same way, proceed to generate + # a .s file (using proto-to-asm tool), disassemble the object file + # generated by the fuzz target (using objdump) and then compare the + # resulting .objdump with your .s file. + for filename in os.listdir(args.corpus): + filename_prefix = args.out + "/" + filename + # Step 2: Run fuzzer with filetype=obj; check for error in fuzz target. + target_error_occurred, out = call_fuzzer_on_corpus_file(args, filename) + + # Step 3: Run proto-to-asm on corpus file to generate .s file. + call_proto_to_asm(args, filename) + + # Step 4: Run golden assembler (gnu) and check for error. + golden_error_occurred, golden_out = call_golden_assembler(args, + filename) + + # Step 5: Compare behavior of fuzz target and golden assembler. + #################################################################### + #-- Initial, tentative interpretation of results, based on the --# + #-- status (pass/fail) of tools (LLVM AS, GNU AS, GNU OBJDUMP) --# + #------------------------------------------------------------------# + # LLVM AS | GNU AS | GNU OBJDUMP | Conclusion # + #------------------------------------------------------------------# + # 0 | 0 | x | invalid/unimplemented instr # + # 0 | 1 | x | LLVM MC bug/unimplemented instr # + # 1 | 0 | x | LLVM MC bug # + # 1 | 1 | 0 | LLVM MC bug and GCC bug # + # 1 | 1 | 1 | success # + #################################################################### + if target_error_occurred and golden_error_occurred: + if args.verbose: + print("Both assemblers failed to assemble file: " + filename) + list_of_llvm_as_and_gnu_as_fails.append(filename) + continue + elif target_error_occurred: + if args.verbose: + print("Only target AS failed to assemble file: " + filename) + list_of_llvm_as_fails.append(filename) + out = out.split("\n") + for line in out: + if (line.startswith("error:")): + list_of_llvm_as_fails.append(line) + nextline = out[out.index(line) + 1] + list_of_llvm_as_fails.append(nextline) + continue + elif golden_error_occurred: + if args.verbose: + print("Only golden AS failed to assemble file: " + filename) + list_of_gnu_as_fails.append(filename) + error_msg = golden_out.split("Error:", golden_out.count("\n")) + error_msg = error_msg[1] + list_of_gnu_as_fails.append(error_msg) + continue + else: + if args.verbose: + print("Both assemblers assembled file: " + filename) + list_of_llvm_as_and_gnu_as_passes.append(filename) + + # Step 6: Call objdump on each .o file (generated by fuzz target) in + # the output directory, to generate corresponding .objdump files. + objdump_file = call_objdump(args, filename) + + # Step 7: Process files generated by objdump so that the files only + # contain instructions. + process_objdump_file(objdump_file, args, filename) + + # Step 8: Diff the .s file generated by proto-to-asm with .objdump file + # generated by objdump. + passes, fails, list_of_diffs = print_file_status(args, filename, + passes, fails, + list_of_diffs) - # Step 2: Run proto-to-asm on corpus file to generate .s file. - proto_to_asm_command = ["llvm-mc-assemble-proto-to-asm", \ - args.corpus + "/" + filename, "-riscv-no-aliases=" + - str(args.NoAliases)] + print_result(passes, fails, list_of_llvm_as_and_gnu_as_fails, + list_of_llvm_as_fails, list_of_gnu_as_fails, + list_of_llvm_as_and_gnu_as_passes, list_of_diffs) + + sys.exit(0 if fails == 0 else 1) + + +def parse_arguments(parser): + # Flags for the directory names, corpus and outputdir. + parser.add_argument("--corpus", type=str, help="corpus directory name", + required=True) + parser.add_argument("--out", type=str, + help="output directory name for obj files", + default=None) + + # Flags for llvm-mc-assemble-proto-fuzzer: triple, mattr, filetype. + parser.add_argument("--triple", type=str, help="specify the triple", + default="riscv32") + parser.add_argument("--mattr", type=str, help="specify mattr", + default="") + parser.add_argument("--march", type=str, help="specify march", + default="rv32i") + parser.add_argument("--filetype", type=str, help="asm or obj", + default="obj") + + # These args are passed in after the -fuzzer-args flag. + parser.add_argument("--runs", type=int, help="number of runs", default=100) + parser.add_argument("--max-len", type=int, + help="limit instruction size for fuzzing", default=40) + + # Flag specifies the name of objdump executable. + parser.add_argument("--objdump", type=str, help="specify the path to" + " objdump", default="/prj/llvm-arm/home/common/" + "build_tools/riscv32-gnu-7.2.0/bin/" + "riscv32-unknown-linux-gnu-objdump") + + # Flag specifies the name of golden assembler executable. + parser.add_argument("--assemble", type=str, help="specify the path to" + " golden assembler", default="/prj/llvm-arm/home/" + "common/build_tools/riscv32-gnu-7.2.0/bin/" + "riscv32-unknown-linux-gnu-as") + + # Flag specifies that we should print out everything + parser.add_argument("--verbose", dest="verbose", action="store_true") + parser.set_defaults(verbose=False) + + +# This function calls the llvm-mc-assemble-proto-fuzzer with a given extension +def call_fuzzer(args): + cmd = 'llvm-mc-assemble-proto-fuzzer-rv32 {corpus} \ + -triple={triple} -mattr={mattr} -filetype={filetype} -fuzzer-args ' \ + + '-runs={runs} -max_len={max_len}' + cmd = cmd.format(corpus=args.corpus, triple=args.triple, + mattr=args.mattr, filetype=args.filetype, + runs=args.runs, max_len=args.max_len) + fuzz_command = shlex.split(cmd) + + fuzz_proc = subprocess.Popen(fuzz_command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + fuzz_out, fuzz_err = fuzz_proc.communicate() + if fuzz_proc.returncode != 0: + raise ValueError('failed to run fuzz {}: {}'.format(fuzz_command, + fuzz_err)) + if args.verbose: + print(fuzz_out) + + +def call_fuzzer_on_corpus_file(args, filename): + target_error_occurred = False + cmd = 'llvm-mc-assemble-proto-fuzzer-rv32 {corpus}/{file} ' \ + + '-triple={triple} -mattr={mattr} -out={out} ' \ + + '-filetype=obj -fuzzer-args -runs=1' + cmd = cmd.format(corpus=args.corpus, file=filename, + triple=args.triple, mattr=args.mattr, out=args.out) + target_as_command = shlex.split(cmd) + target_as_proc = subprocess.Popen(target_as_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + target_as_out, target_as_err = target_as_proc.communicate() + if target_as_proc.returncode != 0: + raise ValueError('failed to run fuzzer {}: {}'.format( + target_as_command, target_as_err)) + if target_as_out: + if "error" in target_as_out: + target_error_occurred = True + if args.verbose: + print(target_as_out) + return target_error_occurred, target_as_out + + +def call_proto_to_asm(args, filename): + filename_prefix = args.out + "/" + filename + cmd = 'llvm-mc-assemble-proto-to-asm-rv32 {corpus}/{file}' + cmd = cmd.format(corpus=args.corpus, file=filename) + proto_to_asm_command = shlex.split(cmd) asm_file = open(filename_prefix + ".s", "w+r") asm_proc = subprocess.Popen(proto_to_asm_command, stdout=asm_file, - stderr=subprocess.STDOUT) + stderr=subprocess.STDOUT) asm_out, asm_err = asm_proc.communicate() if asm_proc.returncode != 0: raise ValueError('failed to run {}: {}'.format(proto_to_asm_command, - asm_err)) - - # Step 3: Generate .o files in the outputdir. - cmd = 'llvm-mc-assemble-proto-fuzzer {corpus}/{file} -triple={triple}' \ - + ' -mattr={mattr} -out={out} -filetype=obj -fuzzer-args -runs=1' - cmd = cmd.format(corpus=args.corpus, file=filename, triple=args.triple, \ - mattr=args.mattr, out=args.out) - obj_files_command = shlex.split(cmd) - obj_proc = subprocess.Popen(obj_files_command, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - obj_out, obj_err = obj_proc.communicate() - if obj_proc.returncode != 0: - raise ValueError('failed to run fuzzer {}: {}'.format( \ - obj_files_command, obj_err)) - if args.verbose: - print(obj_out) + asm_err)) + elif asm_out: + if args.verbose: + print("Asm_out: " + asm_out) + asm_file.close() + - # Step 4: Call objdump on each .o file in the output directory, - # to generate corresponding .objdump files. +def call_golden_assembler(args, filename): + golden_error_occurred = False + cmd = '{assemble} {dirname}/{filename}.s -march={march} ' \ + + '-o {dirname}/{filename}.out' + cmd = cmd.format(assemble=args.assemble, march=args.march, + dirname=args.out, filename=filename) + golden_as_command = shlex.split(cmd) + golden_as_proc = subprocess.Popen(golden_as_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + golden_as_out, golden_as_err = golden_as_proc.communicate() + if golden_as_proc.returncode != 0: + if "Error:" in golden_as_out: + if args.verbose: + print("golden_as_out: " + golden_as_out) + golden_error_occurred = True + elif args.verbose: + print(golden_as_out) + return golden_error_occurred, golden_as_out + + +def call_objdump(args, filename): + filename_prefix = args.out + "/" + filename objdump_file = open(filename_prefix + ".objdump", "w+r") cmd = '{objdump} -dr -M numeric {filename}.o' cmd = cmd.format(objdump=args.objdump, filename=filename_prefix) objdump_command = shlex.split(cmd) objdump_proc = subprocess.Popen(objdump_command, - stdout=objdump_file, stderr=subprocess.STDOUT) + stdout=objdump_file, + stderr=subprocess.STDOUT) objdump_out, objdump_err = objdump_proc.communicate() if objdump_proc.returncode != 0: - raise ValueError('failed to run objdump {}: ' \ - + '{}'.format(objdump_command, objdump_err)) + raise ValueError('failed to run objdump {}: ' + + '{}'.format(objdump_command, objdump_err)) + return objdump_file - # Step 5: Process asm files so that files only contain asm instructions. - asm_file.seek(0) - lines = asm_file.readlines() - asm_file.close() - write_asm_file = open(filename_prefix + ".parsed_s", "w") - # Remove the first line of .s file, which contains the filename. For example: - # // corpus/dff0318decde43ce5065a4209412aa2c68d01318 - for line in lines: - if "\t" in line: - write_asm_file.write(line) - write_asm_file.close() - # Step 6: Process files generated by objdump so that the files only contain - # instructions. +def process_objdump_file(objdump_file, args, filename): + filename_prefix = args.out + "/" + filename objdump_file.seek(0) temp_objdump_file = tempfile.TemporaryFile("w+r") - # Only the lines containing asm instructions have tabs in them; remove all - # other lines (for example, header lines and whitespace). + # Only the lines containing asm instructions have tabs in them; remove + # all other lines (for example, header lines and whitespace). for line in objdump_file: if "\t" in line: temp_objdump_file.write(line) @@ -152,28 +280,72 @@ write_objdump_file.write("\n") temp_objdump_file.close() - # Step 7: Diff the .s file generated by proto-to-asm and the .objdump file - # generated by objdump. - prefix = "Checking " + filename + "..." - file1 = open(filename_prefix + ".parsed_s", "r") + +def print_file_status(args, filename, passes, fails, list_of_diffs): + filename_prefix = args.out + "/" + filename + if args.verbose: + prefix = "Checking " + filename + "..." + file1 = open(filename_prefix + ".s", "r") file2 = open(filename_prefix + ".parsed_objdump", "r") diff = difflib.ndiff(file1.readlines(), file2.readlines()) delta = ''.join(x[0:] for x in diff if x.startswith(('- ', '+ '))) if delta: - print(prefix + "FAILURE!") + if args.verbose: + print(prefix + "FAILURE!") fails = fails + 1 - list_of_failures.append(filename) - list_of_failures.append(delta) + list_of_diffs.append(filename) + list_of_diffs.append(delta) else: - print(prefix + "SUCCESS!") + if args.verbose: + print(prefix + "SUCCESS!") passes = passes + 1 + file1.close() + file2.close() + return passes, fails, list_of_diffs + + +def print_result(passes, fails, list1, list2, list3, list4, list5): + # Both llvm-mc-assembler and golden (gnu) assembler fail + count1 = 0 + for item in list1: + count1 = count1 + 1 + + count2 = 0 + # Target assembler llvm-mc fails + for item in list2: + count2 = count2 + 1 + + if count2 != 0: + print("Target LLVM AS failed to assemble these inputs...") + for item in list2: + print(item) + + count3 = 0 + # Golden (gnu) assembler fails + for item in list3: + count3 = count3 + 1 + if count3 != 0: + print("GNU AS failed to assemble these inputs...") + for item in list3: + print(item) -print("Succeeded: " + str(passes) + "\t\tFailed: " + str(fails) \ - + "\t\t[Total: " + str(passes + fails) + "]") + count4 = 0 + # Both assemblers pass + for item in list4: + count4 = count4 + 1 -if (fails != 0): - print("The following files failed...") - for item in list_of_failures: - print(item) + if fails != 0: + print("Target LLVM AS and golden GNU AS both assembled " + "these inputs, but input ASM string differed from" + "GNU disassembler-generated ASM string...") + for item in list5: + print(item) + print("Both llvm AS and gnu AS fail: {}".format(count1)) + print("Only llvm AS fails: {}".format(count2)) + print("Only gnu AS fails: {}".format(count3)) + print("Both llvm AS and gnu AS pass: {}".format(count4)) + print("[Succeeded: {}/{}\tFailed: {}/{}]".format(passes, + count4, fails, count4)) -sys.exit(0 if fails == 0 else 1) +if __name__ == "__main__": + main() Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/example_asm_proto.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/example_asm_proto.proto @@ -0,0 +1,63 @@ +//===-- example_asm_proto.proto - Protobuf description of ASM -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes a subset of ASM as a protobuf. It is used by the +/// example fuzzer to generate basic inputs to fuzz the llvm mc layer. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message Register { + enum RegName { + X0 = 1; X1 = 2; X2 = 3; X3 = 4; X4 = 5; + X5 = 6; X6 = 7; X7 = 8; X8 = 9; X9 = 10; + X10 = 11; X11 = 12; X12 = 13; X13 = 14; X14 = 15; + X15 = 16; X16 = 17; X17 = 18; X18 = 19; X19 = 20; + X20 = 21; X21 = 22; X22 = 23; X23 = 24; X24 = 25; + X25 = 26; X26 = 27; X27 = 28; X28 = 29; X29 = 30; + X30 = 31; X31 = 32; + }; + required RegName name = 1; +} + +message RTypeOpcode { + enum Op { + ADD = 1; SUB = 2; + }; + required Op op = 1; +} + +message RTypeOperands { + required Register operand1 = 1; + required Register operand2 = 2; + required Register operand3 = 3; +} + +message RTypeStatement { + required RTypeOpcode opcode = 1; + required RTypeOperands operands = 2; +} + +message AsmStatement { + oneof asmstatement_oneof { + RTypeStatement statement = 2; + } +} + +message AsmStatementSeq { + repeated AsmStatement statements = 1; +} + +message Assembly { + required AsmStatementSeq asmStatements = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32.proto @@ -0,0 +1,220 @@ +//===-- rv32.proto - Protobuf description of ASM --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file serves as the base for the protobuf representation of the RV32 +/// ISA of RISC-V ASM. It is used to more easily find interesting inputs +/// for fuzzing the llvm mc layer. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +import "rv32_operands.proto"; +import "rv32a.proto"; +import "rv32c.proto"; +import "rv32d.proto"; +import "rv32f.proto"; +import "rv32i.proto"; +import "rv32m.proto"; + +// Note: The order of the fields does not matter for the grammar since +// the structure of the fuzzed ASM statements is handled in proto_to_asm. +message RegRegRegRegStmt { + required Register operand1 = 1; + required Register operand2 = 2; + required Register operand3 = 3; + required Register operand4 = 4; + optional RoundingMode operand5 = 5; + oneof opcode_oneof { + D_R4FormatOpcode opcode1 = 6; + F_R4FormatOpcode opcode2 = 7; + } +} + +message RegRegRegStmt1 { + required Register operand1 = 1; + required Register operand2 = 2; + required Register operand3 = 3; + optional RoundingMode operand4 = 4; + oneof opcode_oneof { + D_RFormat1Opcode opcode1 = 5; + D_RegRegReg_Pseudo opcode2 = 6; + F_RFormat1Opcode opcode3 = 7; + F_RegRegReg_Pseudo opcode4 = 8; + I_RFormatOpcode opcode5 = 9; + I_RegRegReg_Pseudo opcode6 = 10; + M_RFormatOpcode opcode7 = 11; + } +} + +message RegRegRegStmt2 { + required Register operand1 = 1; + required Register operand2 = 2; + required Register operand3 = 3; + oneof opcode_oneof { + A_RFormat1Opcode opcode1 = 4; + } +} + +message RegRegImmStmt1 { + required Register operand1 = 1; + required Register operand2 = 2; + required Immediate operand3 = 3; + oneof opcode_oneof { + I_BFormatOpcode opcode1 = 4; + I_IFormatOpcode opcode2 = 5; + I_RegRegImm_Pseudo opcode3 = 6; + } +} + +message RegRegImmStmt2 { + required Register operand1 = 1; + required Register operand2 = 2; + required Immediate operand3 = 3; + oneof opcode_oneof { + C_CLFormatOpcode opcode1 = 4; + C_CSFormat1Opcode opcode2 = 5; + D_IFormatLoadOpcode opcode3 = 6; + D_SFormatOpcode opcode4 = 7; + F_IFormatLoadOpcode opcode5 = 8; + F_SFormatOpcode opcode6 = 9; + I_IFormatLoadOpcode opcode7 = 10; + I_SFormatOpcode opcode8 = 11; + } +} + +message RegRegStmt1 { + required Register operand1 = 1; + required Register operand2 = 2; + optional RoundingMode operand3 = 3; + oneof opcode_oneof { + C_CRFormat2Opcode opcode1 = 4; + C_CSFormat2Opcode opcode2 = 5; + D_RFormat2Opcode opcode3 = 6; + D_RegReg_Pseudo opcode4 = 7; + F_RFormat2Opcode opcode5 = 8; + F_RegReg_Pseudo opcode6 = 9; + I_RegReg_Pseudo opcode7 = 10; + } +} + +message RegRegStmt2 { + required Register operand1 = 1; + required Register operand2 = 2; + oneof opcode_oneof { + A_RFormat2Opcode opcode1 = 3; + } +} + +message RegImmStmt { + required Register operand1 = 1; + required Immediate operand2 = 2; + oneof opcode_oneof { + C_CBFormatOpcode opcode1 = 3; + C_CIFormatOpcode opcode2 = 4; + C_CIWFormatOpcode opcode3 = 5; + C_CSSFormatOpcode opcode4 = 6; + F_RegImm_Pseudo opcode5 = 7; + I_UFormatOpcode opcode6 = 8; + I_JFormatOpcode opcode7 = 9; + I_RegImm_Pseudo opcode8 = 10; + } +} + +message RegImmRegStmt { + required Register operand1 = 1; + required Immediate operand2 = 2; + required Register operand3 = 3; + required I_CSR_IFormat1Opcode opcode1 = 4; +} + +message ImmImmStmt { + required Immediate operand1 = 1; + required Immediate operand2 = 2; + oneof opcode_oneof { + I_ImmImm_Pseudo opcode1 = 3; + } +} + +message ImmRegStmt { + required Immediate operand1 = 1; + required Register operand2 = 2; + required I_ImmReg_Pseudo opcode1 = 3; +} + +message RegImmImmStmt { + required Register operand1 = 1; + required Immediate operand2 = 2; + required Immediate operand3 = 3; + required I_CSR_IFormat2Opcode opcode1 = 4; +} + +message RegStmt { + required Register operand1 = 1; + oneof opcode_oneof { + C_CRFormat1Opcode opcode1 = 2; + F_Reg_Pseudo opcode2 = 3; + I_Reg_Pseudo opcode3 = 4; + } +} + +message ImmStmt { + required Immediate operand1 = 1; + oneof opcode_oneof { + C_CJFormatOpcode opcode1 = 2; + I_Imm_Pseudo opcode2 = 3; + F_Imm_Pseudo opcode3 = 4; + } +} + +message FenceStmt { + required IORWString operand1 = 1; + required IORWString operand2 = 2; + required I_Other_IFormat1Opcode opcode1 = 3; +} + +message NoOperandsStmt { + oneof opcode_oneof { + C_NoOperands_FormatOpcode opcode1 = 1; + I_NoOpnd_Pseudo opcode2 = 2; + I_Other_IFormat2Opcode opcode3 = 3; + } +} + +message AsmStmt { + oneof asmstmt_oneof { + FenceStmt stmt1 = 1; + ImmStmt stmt2 = 2; + ImmImmStmt stmt3 = 3; + ImmRegStmt stmt4 = 4; + NoOperandsStmt stmt5 = 5; + RegImmStmt stmt6 = 6; + RegImmRegStmt stmt7 = 7; + RegImmImmStmt stmt8 = 8; + RegStmt stmt9 = 9; + RegRegImmStmt1 stmt10 = 10; + RegRegImmStmt2 stmt11 = 11; + RegRegStmt1 stmt12 = 12; + RegRegStmt2 stmt13 = 13; + RegRegRegStmt1 stmt14 = 14; + RegRegRegStmt2 stmt15 = 15; + RegRegRegRegStmt stmt16 = 16; + } +} + +message AsmStmtSeq { + repeated AsmStmt stmts = 1; +} + +message Assembly { + required AsmStmtSeq asmstmts = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32_operands.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32_operands.proto @@ -0,0 +1,115 @@ +//===-- rv32_operands.proto - Protobuf description of ASM -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file is the Prootbuf representation of the operands in the RISC-V ISA. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message IORW { + enum ValueRange { + I = 0; + O = 1; + R = 2; + W = 3; + }; + required ValueRange value = 1; +} + +message IORWString { + repeated IORW iorwstring = 1; +} + +message Immediate { + oneof imm_oneof { + sint32 s_imm = 1; + uint32 u_imm = 2; + } +} + +message GenPurposeRegister { + enum ValueRange { + X0 = 0; X1 = 1; X2 = 2; X3 = 3; X4 = 4; + X5 = 5; X6 = 6; X7 = 7; X8 = 8; X9 = 9; + X10 = 10; X11 = 11; X12 = 12; X13 = 13; X14 = 14; + X15 = 15; X16 = 16; X17 = 17; X18 = 18; X19 = 19; + X20 = 20; X21 = 21; X22 = 22; X23 = 23; X24 = 24; + X25 = 25; X26 = 26; X27 = 27; X28 = 28; X29 = 29; + X30 = 30; X31 = 31; + + ZERO = 32; RA = 33; SP = 34; GP = 35; TP = 36; + T0 = 37; T1 = 38; T2 = 39; S0 = 40; S1 = 41; + A0 = 42; A1 = 43; A2 = 44; A3 = 45; A4 = 46; + A5 = 47; A6 = 48; A7 = 49; S2 = 50; S3 = 51; + S4 = 52; S5 = 53; S6 = 54; S7 = 55; S8 = 56; + S9 = 57; S10 = 58; S11 = 59; T3 = 60; T4 = 61; + T5 = 62; T6 = 63; + + }; + required ValueRange value = 1; +} + +message FloatingPointRegister { + enum ValueRange { + F0 = 0; F1 = 1; F2 = 2; F3 = 3; F4 = 4; + F5 = 5; F6 = 6; F7 = 7; F8 = 8; F9 = 9; + F10 = 10; F11 = 11; F12 = 12; F13 = 13; F14 = 14; + F15 = 15; F16 = 16; F17 = 17; F18 = 18; F19 = 19; + F20 = 20; F21 = 21; F22 = 22; F23 = 23; F24 = 24; + F25 = 25; F26 = 26; F27 = 27; F28 = 28; F29 = 29; + F30 = 30; F31 = 31; + + FT0 = 32; FT1 = 33; FT2 = 34; FT3 = 35; FT4 = 36; + FT5 = 37; FT6 = 38; FT7 = 39; FS0 = 40; FS1 = 41; + FA0 = 42; FA1 = 43; FA2 = 44; FA3 = 45; FA4 = 46; + FA5 = 47; FA6 = 48; FA7 = 49; FS2 = 50; FS3 = 51; + FS4 = 52; FS5 = 53; FS6 = 54; FS7 = 55; FS8 = 56; + FS9 = 57; FS10 = 58; FS11 = 59; FT8 = 60; FT9 = 61; + FT10 = 62; FT11 = 63; + }; + required ValueRange value = 1; +} + +message Register { + oneof reg_oneof { + GenPurposeRegister reg1 = 1; + FloatingPointRegister reg2 = 2; + } +} + +message RoundingMode { + enum ValueRange { + DYN = 0; + RDN = 1; + RMM = 2; + RNE = 3; + RTZ = 4; + RUP = 5; + }; + required ValueRange value = 1; +} + +message ImmRegPair { + required Immediate imm = 1; + required Register reg = 2; +} + +message Operand { + oneof operand_oneof { + IORWString str = 1; + Immediate imm = 2; + ImmRegPair pair = 3; + Register reg = 4; + RoundingMode mode = 5; + } +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32a.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32a.proto @@ -0,0 +1,41 @@ +//===-- rv32a.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the A-extension for the +/// RV32 ISA, in Protobuf form. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message A_RFormat1Opcode { + enum ValueRange { + AMOADD_W = 0; + AMOAND_W = 1; + AMOMAX_W = 2; + AMOMAXU_W = 3; + AMOMIN_W = 4; + AMOMINU_W = 5; + AMOOR_W = 6; + AMOSWAP_W = 7; + AMOXOR_W = 8; + SC_W = 9; + }; + required ValueRange value = 1; +} + +message A_RFormat2Opcode { + enum ValueRange { + LR_W = 0; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32c.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32c.proto @@ -0,0 +1,119 @@ +//===-- rv32c.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the C-extension for the +/// RV32 ISA, in Protobuf form. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message C_CIFormatOpcode { + enum ValueRange { + C_ADDI = 0; + C_ADDI16SP = 1; + C_FLDSP = 2; + C_FLWSP = 3; + C_LI = 4; + C_LUI = 5; + C_LWSP = 6; + C_SLLI = 7; + } + required ValueRange value = 1; +} + +message C_CSSFormatOpcode { + enum ValueRange { + C_SWSP = 0; + C_FSWSP = 1; + C_FSDSP = 2; + } + required ValueRange value = 1; +} + +message C_CLFormatOpcode { + enum ValueRange { + C_LW = 0; + C_FLW = 1; + C_FLD = 2; + } + required ValueRange value = 1; +} + +message C_CSFormat1Opcode { + enum ValueRange { + C_SW = 0; + C_FSW = 1; + C_FSD = 2; + } + required ValueRange value = 1; +} + +message C_CSFormat2Opcode { + enum ValueRange { + C_AND = 0; + C_OR = 1; + C_SUB = 2; + C_XOR = 3; + } + required ValueRange value = 1; +} + +message C_CJFormatOpcode { + enum ValueRange { + C_J = 0; + C_JAL = 1; + } + required ValueRange value = 1; +} + +message C_CRFormat1Opcode { + enum ValueRange { + C_JR = 0; + C_JALR = 1; + } + required ValueRange value = 1; +} + +message C_CRFormat2Opcode { + enum ValueRange { + C_ADD = 0; + C_MV = 1; + } + required ValueRange value = 1; +} + +message C_CBFormatOpcode { + enum ValueRange { + C_ANDI = 0; + C_BEQZ = 1; + C_BNEZ = 2; + C_SRAI = 3; + C_SRLI = 4; + } + required ValueRange value = 1; +} + +message C_CIWFormatOpcode { + enum ValueRange { + C_ADDI4SPN = 0; + } + required ValueRange value = 1; +} + +message C_NoOperands_FormatOpcode { + enum ValueRange { + C_NOP = 0; + C_EBREAK = 1; + } + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32d.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32d.proto @@ -0,0 +1,93 @@ +//===-- rv32d.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the D-extension for the +/// RV32 ISA, in Protobuf form. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message D_RFormat1Opcode { + enum ValueRange { + FADD_D = 0; + FDIV_D = 1; + FEQ_D = 2; + FLT_D = 3; + FLE_D = 4; + FMAX_D = 5; + FMIN_D = 6; + FMUL_D = 7; + FSGNJN_D = 8; + FSGNJ_D = 9; + FSGNJX_D = 10; + FSUB_D = 11; + }; + required ValueRange value = 1; +} + +message D_RFormat2Opcode { + enum ValueRange { + FCLASS_D = 0; + FCVT_D_S = 1; + FCVT_D_W = 2; + FCVT_D_WU = 3; + FCVT_S_D = 4; + FCVT_W_D = 5; + FCVT_WU_D = 6; + FMV_D_X = 7; + FMV_X_D = 8; + FSQRT_D = 9; + }; + required ValueRange value = 1; +} + +message D_R4FormatOpcode { + enum ValueRange { + FMADD_D = 0; + FMSUB_D = 1; + FNMADD_D = 2; + FNMSUB_D = 3; + }; + required ValueRange value = 1; +} + +message D_IFormatLoadOpcode { + enum ValueRange { + FLD = 0; + }; + required ValueRange value = 1; +} + +message D_SFormatOpcode { + enum ValueRange { + FSD = 0; + }; + required ValueRange value = 1; +} + +message D_RegReg_Pseudo { + enum ValueRange { + FABS_D = 0; + FMV_D = 1; + FNEG_D = 2; + }; + required ValueRange value = 1; +} + +message D_RegRegReg_Pseudo { + enum ValueRange { + FGT_D = 0; + FGE_D = 1; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32f.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32f.proto @@ -0,0 +1,123 @@ +//===-- rv32f.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the F-extension for the +/// RV32 ISA, in Protobuf form. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message F_RFormat1Opcode { + enum ValueRange { + FADD_S = 0; + FDIV_S = 1; + FEQ_S = 2; + FLT_S = 3; + FLE_S = 4; + FMAX_S = 5; + FMIN_S = 6; + FMUL_S = 7; + FSGNJN_S = 8; + FSGNJ_S = 9; + FSGNJX_S = 10; + FSUB_S = 11; + }; + required ValueRange value = 1; +} + +message F_RFormat2Opcode { + enum ValueRange { + FCLASS_S = 0; + FCVT_S_W = 1; + FCVT_S_WU = 2; + FCVT_W_S = 3; + FCVT_WU_S = 4; + FMV_X_W = 5; + FMV_W_X = 6; + FSQRT_S = 7; + }; + required ValueRange value = 1; +} + +message F_R4FormatOpcode { + enum ValueRange { + FMADD_S = 0; + FMSUB_S = 1; + FNMADD_S = 2; + FNMSUB_S = 3; + }; + required ValueRange value = 1; +} + +message F_IFormatLoadOpcode { + enum ValueRange { + FLW = 0; + }; + required ValueRange value = 1; +} + +message F_SFormatOpcode { + enum ValueRange { + FSW = 0; + }; + required ValueRange value = 1; +} + +message F_Imm_Pseudo { + enum ValueRange { + FSFLAGSI = 0; + FSRMI = 1; + }; + required ValueRange value = 1; +} + +message F_Reg_Pseudo { + enum ValueRange { + FRCSR = 0; + FRFLAGS = 1; + FRRM = 2; + FSFLAGS = 3; + FSRM = 4; + }; + required ValueRange value = 1; +} + +message F_RegImm_Pseudo { + enum ValueRange { + FSFLAGSI = 0; + FSRMI = 1; + }; + required ValueRange value = 1; +} + +message F_RegReg_Pseudo { + enum ValueRange { + FABS_S = 0; + FMV_S = 1; + FMV_S_X = 2; + FMV_X_S = 3; + FNEG_S = 4; + FSFLAGS = 5; + FSRM = 6; + FSCSR = 7; + }; + required ValueRange value = 1; +} + +message F_RegRegReg_Pseudo { + enum ValueRange { + FGT_S = 0; + FGE_S = 1; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32i.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32i.proto @@ -0,0 +1,250 @@ +//===-- rv32i.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the I-extension for the +/// RV32 ISA, in Protobuf form. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message I_RFormatOpcode { + enum ValueRange { + ADD = 0; + AND = 1; + OR = 2; + SLL = 3; + SLT = 4; + SLTU = 5; + SRA = 6; + SRL = 7; + SUB = 8; + XOR = 9; + }; + required ValueRange value = 1; +} + +message I_IFormatOpcode { + enum ValueRange { + ADDI = 0; + ANDI = 1; + JALR = 2; + ORI = 3; + SLLI = 4; + SLTI = 5; + SLTIU = 6; + SRAI = 7; + SRLI = 8; + XORI = 9; + }; + required ValueRange value = 1; +} + +message I_IFormatLoadOpcode { + enum ValueRange { + LB = 0; + LBU = 1; + LH = 2; + LHU = 3; + LW = 4; + }; + required ValueRange value = 1; +} + +message I_SFormatOpcode { + enum ValueRange { + SB = 0; + SH = 1; + SW = 2; + }; + required ValueRange value = 1; +} + +message I_BFormatOpcode { + enum ValueRange { + BEQ = 0; + BGE = 1; + BGEU = 2; + BLT = 3; + BLTU = 4; + BNE = 5; + }; + required ValueRange value = 1; +} + +message I_UFormatOpcode { + enum ValueRange { + AUIPC = 0; + LUI = 1; + }; + required ValueRange value = 1; +} + +message I_JFormatOpcode { + enum ValueRange { + JAL = 0; + }; + required ValueRange value = 1; +} + +message I_CSR_IFormat1Opcode { + enum ValueRange { + CSRRC = 0; + CSRRS = 1; + CSRRW = 2; + }; + required ValueRange value = 1; +} + +message I_CSR_IFormat2Opcode { + enum ValueRange { + CSRRCI = 0; + CSRRSI = 1; + CSRRWI = 2; + }; + required ValueRange value = 1; +} + +message I_Other_IFormat1Opcode { + enum ValueRange { + FENCE = 0; + }; + required ValueRange value = 1; +} + +message I_Other_IFormat2Opcode { + enum ValueRange { + EBREAK = 0; + ECALL = 1; + FENCE_I = 2; + }; + required ValueRange value = 1; +} + +message I_ImmReg_Pseudo { + // reg, imm + enum ValueRange { + CSRC = 0; + CSRR = 1; + CSRS = 2; + CSRW = 3; + }; + required ValueRange value = 1; +} + +message I_RegRegReg_Pseudo { + enum ValueRange { + SGT = 0; + SGTU = 1; + }; + required ValueRange value = 1; +} + +message I_RegImm_Pseudo { + enum ValueRange { + // reg, imm + BEQZ = 0; + BNEZ = 1; + BLEZ = 2; + BGEZ = 3; + BLTZ = 4; + BGTZ = 5; + LA = 6; + LB = 7; + LH = 8; + LI = 9; + LW = 10; + MV = 11; + MOVE = 12; + }; + required ValueRange value = 1; +} + +message I_RegReg_Pseudo { + // reg, reg + enum ValueRange { + MV = 0; + MOVE = 1; + NEG = 2; + NOT = 3; + SEQZ = 4; + SEXT_W = 5; + SGTZ = 6; + SLTZ = 7; + SNEZ = 8; + }; + required ValueRange value = 1; +} + +message I_ImmImm_Pseudo { + // imm, imm + enum ValueRange { + CSRCI = 0; + CSRSI = 1; + CSRWI = 2; + }; + required ValueRange value = 1; +} + +message I_RegRegImm_Pseudo { + // reg,reg,imm + enum ValueRange { + ADD = 0; + AND = 1; + BGT = 2; + BGTU = 3; + BLE = 4; + BLEU = 5; + OR = 6; + SLL = 7; + SLT = 8; + SLTU = 9; + SRA = 10; + SRL = 11; + XOR = 12; + }; + required ValueRange value = 1; +} + +message I_Reg_Pseudo { + enum ValueRange { + JALR = 0; + JR = 1; + RDCYCLE = 2; + RDTIME = 3; + RDINSTRET = 4; + }; + required ValueRange value = 1; +} + +message I_Imm_Pseudo { + enum ValueRange { + CALL = 0; + J = 1; + JAL = 2; + SFENCE_VMA = 3; + TAIL = 4; + }; + required ValueRange value = 1; +} + +message I_NoOpnd_Pseudo { + enum ValueRange { + FENCE = 0; + NOP = 1; + RET = 2; + SBREAK = 3; + SCALL = 4; + SFENCE_VMA = 5; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32m.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32m.proto @@ -0,0 +1,32 @@ +//===-- rv32m.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the M-extension for the +/// RV32 ISA, in Protobuf form. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message M_RFormatOpcode { + enum ValueRange { + DIV = 0; + DIVU = 1; + MUL = 2; + MULH = 3; + MULHSU = 4; + MULHU = 5; + REM = 6; + REMU = 7; + }; + required ValueRange value = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/CMakeLists.txt =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/CMakeLists.txt +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/CMakeLists.txt @@ -2,13 +2,23 @@ set(CMAKE_CXX_FLAGS ${CXX_FLAGS_NOFUZZ}) # Needed by LLVM's CMake checks because this file defines multiple targets. -set(LLVM_OPTIONAL_SOURCES proto_to_asm.cpp proto_to_asm_main.cpp) +set(LLVM_OPTIONAL_SOURCES example_proto_to_asm.cpp proto_to_asm_main.cpp + proto_to_asm_rv32.cpp) -add_clang_library(mcProtoToASM proto_to_asm.cpp +add_clang_library(mcProtoToASM example_proto_to_asm.cpp DEPENDS mcASMProto LINK_LIBS mcASMProto ${PROTOBUF_LIBRARIES} ) +add_clang_library(mcRv32ProtoToASM proto_to_asm_rv32.cpp + DEPENDS mcRv32ASMProto + LINK_LIBS mcRv32ASMProto ${PROTOBUF_LIBRARIES} + ) + add_clang_executable(llvm-mc-assemble-proto-to-asm proto_to_asm_main.cpp) +add_clang_executable(llvm-mc-assemble-proto-to-asm-rv32 + proto_to_asm_main.cpp) target_link_libraries(llvm-mc-assemble-proto-to-asm PRIVATE mcProtoToASM) +target_link_libraries(llvm-mc-assemble-proto-to-asm-rv32 + PRIVATE mcRv32ProtoToASM) Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/example_proto_to_asm.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/example_proto_to_asm.cpp @@ -0,0 +1,85 @@ +//==-- example_proto_to_asm.cpp - Protobuf-ASM conversion ------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements functions for converting between protobufs for an example +// assembly language grammar and the assembly language instructions. +// +//===----------------------------------------------------------------------===// +#include "proto_to_asm.h" +#include "example_asm_proto.pb.h" + +#include +#include +#include +#include +#include + +using namespace google::protobuf; + +#define EMIT_ASM(X,Y) \ + { \ + const EnumDescriptor * ED = X; \ + if (ED) \ + return OS << EmitAsm(ED, Y); \ + assert(!"Could not find descriptor."); \ + } + +static std::string EmitAsm(const EnumDescriptor * Enum, + int Num) { + const EnumValueDescriptor * D = Enum->FindValueByNumber(Num); + std::string Msg = D->name(); + std::transform(Msg.begin(), Msg.end(), Msg.begin(), ::tolower); + return Msg; +} + +namespace mc_proto_fuzzer { +std::ostream &operator<<(std::ostream &OS, const Register &X) { + EMIT_ASM((Register_RegName_descriptor()), (X.name())); +} +std::ostream &operator<<(std::ostream &OS, const RTypeOpcode &X) { + EMIT_ASM((RTypeOpcode_Op_descriptor()), (X.op())); +} +std::ostream &operator<<(std::ostream &OS, const RTypeOperands &X) { + OS << X.operand1(); + OS << "," << X.operand2(); + OS << "," << X.operand3(); + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RTypeStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operands() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const AsmStatement &X) { + return OS << X.statement(); +} +std::ostream &operator<<(std::ostream &OS, const AsmStatementSeq &X) { + for (auto &ST : X.statements()) + OS << ST; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const Assembly &X) { + return OS << X.asmstatements(); +} + +// --------------------------------- + +std::string FunctionToString(const Assembly &Input) { + std::ostringstream OS; + OS << Input; + return OS.str(); +} +std::string ProtoToASM(const uint8_t *Data, size_t Size) { + Assembly Message; + if (!Message.ParsePartialFromArray(Data, Size)) + return "#error invalid proto\n"; + return FunctionToString(Message); +} + +} // namespace mc_proto_fuzzer Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.h =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.h +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.h @@ -11,13 +11,11 @@ // //===----------------------------------------------------------------------===// -#include -#include #include namespace mc_proto_fuzzer { class Assembly; std::string FunctionToString(const Assembly &input); -std::string ProtoToASM(const uint8_t *data, size_t size, bool flag_enabled); +std::string ProtoToASM(const uint8_t *data, size_t size); } Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.cpp =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.cpp +++ /dev/null @@ -1,128 +0,0 @@ -//==-- proto_to_asm.cpp - Protobuf-ASM conversion --------------------------==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implements functions for converting between protobufs for an example -// assembly language grammar and the assembly language instructions. -// -//===----------------------------------------------------------------------===// -#include "proto_to_asm.h" -#include "asm_proto.pb.h" - -#include -#include - -namespace mc_proto_fuzzer { - -static bool NoAliases = false; -std::ostream &operator<<(std::ostream &OS, const Register &X) { - switch (X.name()) { - case Register::X0: OS << "x0"; break; - case Register::X1: OS << "x1"; break; - case Register::X2: OS << "x2"; break; - case Register::X3: OS << "x3"; break; - case Register::X4: OS << "x4"; break; - case Register::X5: OS << "x5"; break; - case Register::X6: OS << "x6"; break; - case Register::X7: OS << "x7"; break; - case Register::X8: OS << "x8"; break; - case Register::X9: OS << "x9"; break; - case Register::X10: OS << "x10"; break; - case Register::X11: OS << "x11"; break; - case Register::X12: OS << "x12"; break; - case Register::X13: OS << "x13"; break; - case Register::X14: OS << "x14"; break; - case Register::X15: OS << "x15"; break; - case Register::X16: OS << "x16"; break; - case Register::X17: OS << "x17"; break; - case Register::X18: OS << "x18"; break; - case Register::X19: OS << "x19"; break; - case Register::X20: OS << "x20"; break; - case Register::X21: OS << "x21"; break; - case Register::X22: OS << "x22"; break; - case Register::X23: OS << "x23"; break; - case Register::X24: OS << "x24"; break; - case Register::X25: OS << "x25"; break; - case Register::X26: OS << "x26"; break; - case Register::X27: OS << "x27"; break; - case Register::X28: OS << "x28"; break; - case Register::X29: OS << "x29"; break; - case Register::X30: OS << "x30"; break; - case Register::X31: OS << "x31"; break; - } - return OS; -} -std::ostream &operator<<(std::ostream &OS, const ITypeOpcode &X) { - switch (X.op()) { - case ITypeOpcode_Op_ADD: OS << "add"; break; - case ITypeOpcode_Op_SUB: OS << "sub"; break; - } - return OS; -} -std::ostream &operator<<(std::ostream &OS, const ITypeOperands &X) { - OS << X.operand1(); - OS << "," << X.operand2(); - OS << "," << X.operand3(); - return OS; -} -std::ostream &operator<<(std::ostream &OS, const ITypeStatement &X) { - OS << "\t" << X.opcode() << "\t"; - OS << X.operands() << "\n"; - return OS; -} -std::ostream &operator<<(std::ostream &OS, const AsmStatement &X) { - if (X.has_statement()) { - ITypeOperands ThreeOperands = X.statement().operands(); - Register Oper1 = ThreeOperands.operand1(); - Register Oper2 = ThreeOperands.operand2(); - Register Oper3 = ThreeOperands.operand3(); - if (NoAliases) { - OS << X.statement(); - return OS; - } - if (X.statement().opcode().op() == ITypeOpcode_Op_ADD) { - OS << X.statement(); - return OS; - } - else if (X.statement().opcode().op() == - ITypeOpcode_Op_SUB) { - if (Oper2.name() == Register_RegName_X0) { - OS << "\tneg\t" << Oper1 << "," << Oper3 << "\n"; - return OS; - } else { - OS << X.statement(); - return OS; - } - } - } - return OS; -} -std::ostream &operator<<(std::ostream &OS, const AsmStatementSeq &X) { - for (auto &ST : X.statements()) OS << ST; - return OS; -} -std::ostream &operator<<(std::ostream &OS, const Assembly &X) { - return OS << X.asmstatements(); -} - -// --------------------------------- - -std::string FunctionToString(const Assembly &Input) { - std::ostringstream OS; - OS << Input; - return OS.str(); -} -std::string ProtoToASM(const uint8_t *Data, size_t Size, bool Flag) { - Assembly Message; - NoAliases = Flag; - if (!Message.ParsePartialFromArray(Data, Size)) - return "#error invalid proto\n"; - return FunctionToString(Message); -} - -} // namespace mc_proto_fuzzer Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_main.cpp =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_main.cpp +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_main.cpp @@ -12,41 +12,18 @@ //===----------------------------------------------------------------------===// #include "proto_to_asm.h" -#include "llvm/Support/CommandLine.h" - #include #include #include #include -using namespace llvm; -static cl::opt NoAliases("riscv-no-aliases", - cl::desc("Set to false to match printed asm" - " of objdump"), - cl::value_desc("boolean"), - cl::init(false)); - int main(int argc, char **argv) { - static std::vector ModifiedArgv; - ModifiedArgv.push_back(argv[0]); - static std::vector NewArgv; - NewArgv.push_back(argv[0]); for (int i = 1; i < argc; i++) { - std::string SearchString(argv[i]); - if (SearchString.find("riscv-no-aliases") != llvm::StringLiteral::npos) - ModifiedArgv.push_back(argv[i]); - else - NewArgv.push_back(argv[i]); - } - cl::ParseCommandLineOptions((int) ModifiedArgv.size(), &ModifiedArgv[0]); - for (int i = 1; i < (int) NewArgv.size(); i++) { - std::fstream in(NewArgv[i]); + std::fstream in(argv[i]); std::string str((std::istreambuf_iterator(in)), std::istreambuf_iterator()); - std::cout << "// " << NewArgv[i] << std::endl; std::cout << mc_proto_fuzzer::ProtoToASM( - reinterpret_cast(str.data()), str.size(), - (bool) NoAliases); + reinterpret_cast(str.data()), str.size()); } return 0; } Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_rv32.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_rv32.cpp @@ -0,0 +1,386 @@ +//==-- proto_to_asm_rv32.cpp - Protobuf-ASM conversion ---------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements functions for converting between protobufs for the assembly +// language grammar for RV32I instruction set and assembly language +// instructions. +// +//===----------------------------------------------------------------------===// +#include "proto_to_asm.h" +#include "rv32.pb.h" +#include "rv32_operands.pb.h" +#include "rv32a.pb.h" +#include "rv32c.pb.h" +#include "rv32d.pb.h" +#include "rv32f.pb.h" +#include "rv32i.pb.h" +#include "rv32m.pb.h" + +#include +#include +#include +#include +#include +#include + +using namespace google::protobuf; + +static std::string EmitAsm(const EnumDescriptor * Enum, + int Num) { + const EnumValueDescriptor * D = Enum->FindValueByNumber(Num); + std::string Msg = D->name(); + std::transform(Msg.begin(), Msg.end(), Msg.begin(), ::tolower); + std::replace(Msg.begin(), Msg.end(), '_', '.'); + return Msg; +} + +namespace mc_proto_fuzzer { +template +void Emit(std::ostream &OS, const T &X) { + const EnumDescriptor *ED = X.ValueRange_descriptor(); + OS << EmitAsm(ED, X.value()); +} +std::ostream &operator<<(std::ostream &OS, const IORWString &X) { + for (auto &IORWChar : X.iorwstring()) + Emit(OS, IORWChar); + return OS; +} +std::ostream &operator<<(std::ostream &OS, const Immediate &X) { + if (X.has_s_imm()) + return OS << X.s_imm(); + if (X.has_u_imm()) + return OS << X.u_imm(); + return OS << "0"; +} +std::ostream &operator<<(std::ostream &OS, const GenPurposeRegister &X) { + Emit(OS, X); + return OS; +} +std::ostream &operator<<(std::ostream &OS, const FloatingPointRegister &X) { + Emit(OS, X); + return OS; +} +std::ostream &operator<<(std::ostream &OS, const Register &X) { + if (X.has_reg1()) + return OS << X.reg1(); + if (X.has_reg2()) + return OS << X.reg2(); + return OS << "x0"; +} +std::ostream &operator<<(std::ostream &OS, const RoundingMode &X) { + Emit(OS, X); + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RegRegRegRegStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else + OS << "fmadd.s"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << ","; + OS << X.operand3() << ","; + OS << X.operand4(); + if (X.has_operand5()) + OS << "," << X.operand5(); + return OS << "\n"; +} +std::ostream &operator<<(std::ostream &OS, const RegRegRegStmt1 &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else if (X.has_opcode3()) + Emit(OS, X.opcode3()); + else if (X.has_opcode4()) + Emit(OS, X.opcode4()); + else if (X.has_opcode5()) + Emit(OS, X.opcode5()); + else if (X.has_opcode6()) + Emit(OS, X.opcode6()); + else if (X.has_opcode7()) + Emit(OS, X.opcode7()); + else + OS << "add"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << ","; + OS << X.operand3(); + if (X.has_operand4()) + OS << "," << X.operand4(); + return OS << "\n"; +} +std::ostream &operator<<(std::ostream &OS, const RegRegRegStmt2 &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else + OS << "amoadd.w"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << ",("; + OS << X.operand3() << ")\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RegRegImmStmt1 &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else + OS << "addi"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << ","; + OS << X.operand3() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RegRegImmStmt2 &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else if (X.has_opcode3()) + Emit(OS, X.opcode3()); + else if (X.has_opcode4()) + Emit(OS, X.opcode4()); + else if (X.has_opcode5()) + Emit(OS, X.opcode5()); + else if (X.has_opcode6()) + Emit(OS, X.opcode6()); + else if (X.has_opcode7()) + Emit(OS, X.opcode7()); + else if (X.has_opcode8()) + Emit(OS, X.opcode8()); + else + OS << "lw"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand3() << "("; + OS << X.operand2() << ")\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RegRegStmt1 &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else if (X.has_opcode3()) + Emit(OS, X.opcode3()); + else if (X.has_opcode4()) + Emit(OS, X.opcode4()); + else if (X.has_opcode5()) + Emit(OS, X.opcode5()); + else if (X.has_opcode6()) + Emit(OS, X.opcode6()); + else if (X.has_opcode7()) + Emit(OS, X.opcode7()); + else + OS << "mv"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2(); + if (X.has_operand3()) + OS << "," << X.operand3(); + return OS << "\n"; +} +std::ostream &operator<<(std::ostream &OS, const RegRegStmt2 &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else + OS << "lr.w"; + OS << "\t"; + OS << X.operand1() << ",(" << X.operand2() << ")\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RegImmStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else if (X.has_opcode3()) + Emit(OS, X.opcode3()); + else if (X.has_opcode4()) + Emit(OS, X.opcode4()); + else if (X.has_opcode5()) + Emit(OS, X.opcode5()); + else if (X.has_opcode6()) + Emit(OS, X.opcode6()); + else if (X.has_opcode7()) + Emit(OS, X.opcode7()); + else if (X.has_opcode8()) + Emit(OS, X.opcode8()); + else + OS << "c.fldsp"; + OS << "\t"; + OS << X.operand1() << "," << X.operand2() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RegImmImmStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else + OS << "csrrwi"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << ","; + OS << X.operand3() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RegImmRegStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else + OS << "csrrw"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << ","; + OS << X.operand3() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RegStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else if (X.has_opcode2()) + Emit(OS, X.opcode2()); + else if (X.has_opcode3()) + Emit(OS, X.opcode3()); + else + OS << "c.jr"; + OS << "\t"; + OS << X.operand1() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const ImmStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + if (X.has_opcode2()) + Emit(OS, X.opcode2()); + if (X.has_opcode3()) + Emit(OS, X.opcode3()); + else + OS << "c.jal"; + OS << "\t"; + OS << X.operand1() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const ImmImmStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else + OS << "csrci"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const ImmRegStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else + OS << "csrc"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const NoOperandsStmt &X) { + if (X.has_opcode1()) { + OS << "\t"; + Emit(OS, X.opcode1()); + return OS << "\n"; + } + if (X.has_opcode2()) { + OS << "\t"; + Emit(OS, X.opcode1()); + return OS << "\n"; + } + return OS << "\tnop\n"; +} +std::ostream &operator<<(std::ostream &OS, const FenceStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + Emit(OS, X.opcode1()); + else + OS << "fence"; + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const AsmStmt &X) { + if (X.has_stmt1()) + return OS << X.stmt1(); + if (X.has_stmt2()) + return OS << X.stmt2(); + if (X.has_stmt3()) + return OS << X.stmt3(); + if (X.has_stmt4()) + return OS << X.stmt4(); + if (X.has_stmt5()) + return OS << X.stmt5(); + if (X.has_stmt6()) + return OS << X.stmt6(); + if (X.has_stmt7()) + return OS << X.stmt7(); + if (X.has_stmt8()) + return OS << X.stmt8(); + if (X.has_stmt9()) + return OS << X.stmt9(); + if (X.has_stmt10()) + return OS << X.stmt10(); + if (X.has_stmt11()) + return OS << X.stmt11(); + if (X.has_stmt12()) + return OS << X.stmt12(); + if (X.has_stmt13()) + return OS << X.stmt13(); + if (X.has_stmt14()) + return OS << X.stmt14(); + return OS << "\tnop\n"; +} +std::ostream &operator<<(std::ostream &OS, const AsmStmtSeq &X) { + for (auto &ST : X.stmts()) + OS << ST; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const Assembly &X) { + return OS << X.asmstmts(); +} + +// --------------------------------- + +std::string FunctionToString(const Assembly &Input) { + std::ostringstream OS; + OS << Input; + return OS.str(); +} +std::string ProtoToASM(const uint8_t *Data, size_t Size) { + Assembly Message; + if (!Message.ParsePartialFromArray(Data, Size)) + return "#error invalid proto\n"; + return FunctionToString(Message); +} + +} // namespace mc_proto_fuzzer