Index: tools/llvm-mc-assemble-proto-fuzzer/CMakeLists.txt =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/CMakeLists.txt +++ tools/llvm-mc-assemble-proto-fuzzer/CMakeLists.txt @@ -12,8 +12,8 @@ # Needed by LLVM's CMake checks because this file defines multiple targets. set(LLVM_OPTIONAL_SOURCES DummyMCFuzzer.cpp - MCFuzzer.cpp ExampleMCProtoFuzzer.cpp + RV32MCProtoFuzzer.cpp ) if(CLANG_ENABLE_ASSEMBLE_PROTO_FUZZER) @@ -23,11 +23,27 @@ add_definitions(-DGOOGLE_PROTOBUF_NO_RTTI) include_directories(${PROTOBUF_INCLUDE_DIRS}) include_directories(${CMAKE_CURRENT_BINARY_DIR}) - protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS asm_proto.proto) + protobuf_generate_cpp(EXAMPLE_PROTO_SRCS EXAMPLE_PROTO_HDRS + proto-files/example_asm_proto.proto) + protobuf_generate_cpp(RV32_PROTO_SRCS RV32_PROTO_HDRS + proto-files/rv32.proto + proto-files/rv32c.proto + proto-files/rv32i.proto + proto-files/rv32m.proto) + set(LLVM_OPTIONAL_SOURCES ${LLVM_OPTIONAL_SOURCES} ${PROTO_SRCS}) + add_clang_library(mcASMProto - ${PROTO_SRCS} - ${PROTO_HDRS} + ${EXAMPLE_PROTO_SRCS} + ${EXAMPLE_PROTO_HDRS} + + LINK_LIBS + ${PROTOBUF_LIBRARIES} + ) + + add_clang_library(mcRv32ASMProto + ${RV32_PROTO_SRCS} + ${RV32_PROTO_HDRS} LINK_LIBS ${PROTOBUF_LIBRARIES} @@ -37,6 +53,9 @@ include(ProtobufMutatorMC) include_directories(${ProtobufMutator_INCLUDE_DIRS}) + # Build the .proto files. + add_clang_subdirectory(proto-files) + # Build the protobuf->C++ translation library and driver. add_clang_subdirectory(proto-to-asm) @@ -48,6 +67,11 @@ ExampleMCProtoFuzzer.cpp ) + add_clang_executable(llvm-mc-assemble-proto-fuzzer-rv32 + ${DUMMY_MAIN} + RV32MCProtoFuzzer.cpp + ) + set(COMMON_PROTO_FUZZ_LIBRARIES ${ProtobufMutator_LIBRARIES} ${PROTOBUF_LIBRARIES} @@ -62,6 +86,13 @@ mcProtoToASM ) + target_link_libraries(llvm-mc-assemble-proto-fuzzer-rv32 + PRIVATE + ${COMMON_PROTO_FUZZ_LIBRARIES} + mcRv32ASMProto + mcRv32ProtoToASM + ) + endif() add_clang_subdirectory(handle-asm) Index: tools/llvm-mc-assemble-proto-fuzzer/ExampleMCProtoFuzzer.cpp =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/ExampleMCProtoFuzzer.cpp +++ tools/llvm-mc-assemble-proto-fuzzer/ExampleMCProtoFuzzer.cpp @@ -14,7 +14,7 @@ /// //===----------------------------------------------------------------------===// -#include "asm_proto.pb.h" +#include "example_asm_proto.pb.h" #include "handle-asm/handle_asm.h" #include "proto-to-asm/proto_to_asm.h" #include "src/libfuzzer/libfuzzer_macro.h" Index: tools/llvm-mc-assemble-proto-fuzzer/MCFuzzer.cpp =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/MCFuzzer.cpp +++ /dev/null @@ -1,29 +0,0 @@ -//===-- MCFuzzer.cpp - Fuzz MC Layer --------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements a function that runs assembler on a single -/// input. This function is then linked into the Fuzzer library. -/// -//===----------------------------------------------------------------------===// - -#include "asm_proto.pb.h" -#include "handle-asm/handle_asm.h" -#include "proto-to-asm/proto_to_asm.h" -#include "src/libfuzzer/libfuzzer_macro.h" - -using namespace mc_proto_fuzzer; - -extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { return 0; } - -extern "C" int LLVMFuzzerTestOneInput(uint8_t *data, size_t size) { - std::string s((const char *)data, size); - HandleASM(s); - return 0; -} Index: tools/llvm-mc-assemble-proto-fuzzer/README.txt =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/README.txt +++ tools/llvm-mc-assemble-proto-fuzzer/README.txt @@ -10,7 +10,9 @@ -DCMAKE_PREFIX_PATH=/path/to/install \ -DPBM_FUZZ_PATH=/full/path/to/protobuf/install \ -DPBM_REPO=file:///full/path/to/libprotobuf-mutator ../../llvm -$ ninja -v llvm-mc-assemble-proto-fuzzer llvm-mc-assemble-proto-to-asm +$ ninja -v llvm-mc-assemble-proto-fuzzer llvm-mc-assemble-proto-to-asm \ + llvm-mc-assemble-proto-fuzzer-rv32 \ + llvm-mc-assemble-proto-to-asm-rv32 \ 2>&1 | tee log ------------------------------------------------------------------------------- @@ -29,21 +31,14 @@ fuzzer with, including the corpus directory and the full path to objdump. For example: -$ python mcfuzz.py --corpus corpus --max-len 32 --runs 10 --triple riscv32 - --out outputdir --objdump /full/path/to/riscv32-unknown-linux-gnu-objdump - --mattr +c +$ python mcfuzz.py --corpus corpus --max-len 32 --runs 10 --triple riscv32 \ + --out outputdir --objdump /full/path/to/riscv32-unknown-linux-gnu-objdump \ + --mattr +c,+m --march rv32imc \ + --assemble /full/path/to/riscv32-unknown-linux-gnu-as Run the script with the --verbose flag if you would like to see the output of the fuzzer as it runs. -Use the --riscv-no-aliases flag if you want the generated .s files (from -proto-to-asm) to be printed in their original form (ie to print - `sub x3,x0,x3` to the file instead of - `neg x3,x4`). Note that using this flag (--riscv-no-aliases) may cause - some test cases to fail, as the default behavior of the compiler is to replace - `sub ,x0,` with a `neg` instruction. Aliases are on by default - (NoAliases=False). - NOTE: If you run the script without specifying an ouptut directory [--out], the script will terminate after fuzzing the corpus a specified number [--runs] of times. @@ -52,14 +47,32 @@ contain the generated corpus files. Next, we populate the output directory by iterating through all the files -currently in the corpus. We use the llvm-mc-assemble-proto-to-asm tool to -generate the .s files. We use the llvm-mc-assemble-proto-fuzzer tool, invoked -on each file in the corpus, with -filetype=obj, and -runs=1, to generate a +currently in the corpus. First, we run the golden AS (GNU) and check its +behavior (successfully assembled, or failed to assemble) against the behavior of +our fuzz target (LLVM-MC AS). If and only if both assemblers assemble the input, +we proceed. + +We use the llvm-mc-assemble-proto-to-asm tool to generate the .s files. +We use the llvm-mc-assemble-proto-fuzzer tool, invoked on each file +in the corpus, with -filetype=obj, and -runs=1, to generate a corresponding .o file. Finally, we call objdump on the .o file to produce a .objdump file, which can be compared (after some processing) to the .s file. Thus, the output directory should contain .o, .s, and .objdump files when the script concludes. -The last part of the script runs a diff of all the .s and .objdump files, and -prints out a summary of results (how many diffs passed and how many failed). +The last part of the script prints out a summary of results: on which files the +two assemblers (fuzz target LLVM AS and golden assembler GNU AS) both failed, on +which files only one assembler failed, and on which files both assemblers +successfully assembled the input ASM. + +The printed output will show the ASM statements that caused the fuzz target LLVM +AS to fail (but not the golden AS). It will also print out the ASM +statements that both assemblers assembled if there was any differences +between the golden disassembler's output and our original input ASM. + +------------------------------------------------------------------------------- +Notes: +------------------------------------------------------------------------------- +Some instructions are given aliases by the golden disassembler, and therefore +the diff sometimes fails even when both assemblers can assemble the input ASM. Index: tools/llvm-mc-assemble-proto-fuzzer/RV32MCProtoFuzzer.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/RV32MCProtoFuzzer.cpp @@ -0,0 +1,27 @@ +//===-- MCProtoFuzzer.cpp - Fuzz Assembler --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a function that runs llvm mc assemble on a single +/// input and uses libprotobuf-mutator to find new inputs. This function is +/// then linked into the Fuzzer library. +/// +//===----------------------------------------------------------------------===// + +#include "handle-asm/handle_asm.h" +#include "proto-to-asm/proto_to_asm.h" +#include "rv32.pb.h" +#include "src/libfuzzer/libfuzzer_macro.h" + +using namespace mc_proto_fuzzer; + +DEFINE_BINARY_PROTO_FUZZER(const Assembly& input) { + auto S = FunctionToString(input); + HandleASM(S); +} Index: tools/llvm-mc-assemble-proto-fuzzer/asm_proto.proto =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/asm_proto.proto +++ /dev/null @@ -1,61 +0,0 @@ -//===-- asm_proto.proto - Protobuf description of ASM ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file describes a subset of ASM as a protobuf. It is used to -/// more easily find interesting inputs for fuzzing llvm mc layer. -/// -//===----------------------------------------------------------------------===// - -syntax = "proto2"; - -message Register { - enum RegName { - X0 = 1; X1 = 2; X2 = 3; X3 = 4; X4 = 5; X5 = 6; X6 = 7; X7 = 8; X8 = 9; - X9 = 10; X10 = 11; X11 = 12; X12 = 13; X13 = 14; X14 = 15; X15 = 16; - X16 = 17; X17 = 18; X18 = 19; X19 = 20; X20 = 21; X21 = 22; X22 = 23; - X23 = 24; X24 = 25; X25 = 26; X26 = 27; X27 = 28; X28 = 29; X29 = 30; - X30 = 31; X31 = 32; - }; - required RegName name = 1; -} - -message ITypeOpcode { - enum Op { - ADD = 1; SUB = 2; - }; - required Op op = 1; -} - -message ITypeOperands { - required Register operand1 = 1; - required Register operand2 = 2; - required Register operand3 = 3; -} - -message ITypeStatement { - required ITypeOpcode opcode = 1; - required ITypeOperands operands = 2; -} - -message AsmStatement { - oneof asmstatement_oneof { - ITypeStatement statement = 2; - } -} - -message AsmStatementSeq { - repeated AsmStatement statements = 1; -} - -message Assembly { - required AsmStatementSeq asmStatements = 1; -} - -package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/example_mcfuzz.py =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/example_mcfuzz.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python + +import argparse +import difflib +import os +import shlex +import subprocess +import sys +import tempfile + +parser = argparse.ArgumentParser() + +# Flags for the directory names, corpus and outputdir. +parser.add_argument("--corpus", type=str, help="corpus directory name", + required=True) +parser.add_argument("--out", type=str, + help="output directory name for obj files", default=None) + +# Flags for llvm-mc-assemble-proto-fuzzer: triple, mattr, filetype. +parser.add_argument("--triple", type=str, help="specify the triple", + default="riscv32") +parser.add_argument("--mattr", type=str, help="specify mattr", + default="") +parser.add_argument("--filetype", type=str, help="asm or obj", + default="obj") + +# These args are passed in after the -fuzzer-args flag. +parser.add_argument("--runs", type=int, help="number of runs", default=100) +parser.add_argument("--max-len", type=int, + help="limit instruction size for fuzzing", default=40) + +# Flag specifies the name of objdump executable. +parser.add_argument("--objdump", type=str, help="specify the path to" + " objdump", + default="/prj/llvm-arm/home/common/build_tools/" + "riscv32-gnu-7.2.0/bin/" + "riscv32-unknown-linux-gnu-objdump") + +# Flag specifies that we should print out everything +parser.add_argument("--verbose", dest="verbose", action="store_true") +parser.set_defaults(verbose=False) + +parser.add_argument("--riscv-no-aliases", dest="NoAliases", + action="store_true") +parser.set_defaults(NoAliases=False) + +args = parser.parse_args() + +# Step 1: Invoke fuzzer to generate a corpus. +cmd = 'llvm-mc-assemble-proto-fuzzer {corpus} -triple={triple} ' \ + + '-mattr={mattr} -filetype={filetype} -fuzzer-args ' \ + + '-runs={runs} -max_len={max_len}' +cmd = cmd.format(corpus=args.corpus, triple=args.triple, mattr=args.mattr, + filetype=args.filetype, runs=args.runs, max_len=args.max_len) +fuzz_command = shlex.split(cmd) + +fuzz_proc = subprocess.Popen(fuzz_command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) +fuzz_out, fuzz_err = fuzz_proc.communicate() +if fuzz_proc.returncode != 0: + raise ValueError('failed to run fuzz {}: {}'.format(fuzz_command, + fuzz_err)) +if args.verbose: + print(fuzz_out) + +# If user specified an output directory, proceed to step 2; otherwise, exit. +if args.out is None: + print("No output directory specified; exiting after populating " + "corpus directory.") + sys.exit(0) + +# Keep track of which diffs pass / fail. +passes = 0 +fails = 0 +list_of_failures = [] +# Iterate through the corpus body. +# For each file, generate a .s and a .o file. Then, use objdump to +# generate a .objdump file, which we will compare to the .s file. +# Diff the .s with the corresponding .objdump file. +for filename in os.listdir(args.corpus): + + filename_prefix = args.out + "/" + filename + + # Step 2: Run proto-to-asm on corpus file to generate .s file. + proto_to_asm_command = ["llvm-mc-assemble-proto-to-asm", + args.corpus + "/" + filename, + "-riscv-no-aliases=" + + str(args.NoAliases)] + asm_file = open(filename_prefix + ".s", "w+r") + asm_proc = subprocess.Popen(proto_to_asm_command, stdout=asm_file, + stderr=subprocess.STDOUT) + asm_out, asm_err = asm_proc.communicate() + if asm_proc.returncode != 0: + raise ValueError('failed to run {}: {}'.format(proto_to_asm_command, + asm_err)) + asm_file.close() + + # Step 3: Generate .o files in the outputdir. + cmd = 'llvm-mc-assemble-proto-fuzzer {corpus}/{file} -triple={triple}' \ + + ' -mattr={mattr} -out={out} -filetype=obj -fuzzer-args -runs=1' + cmd = cmd.format(corpus=args.corpus, file=filename, triple=args.triple, + mattr=args.mattr, out=args.out) + obj_files_command = shlex.split(cmd) + obj_proc = subprocess.Popen(obj_files_command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + obj_out, obj_err = obj_proc.communicate() + if obj_proc.returncode != 0: + raise ValueError('failed to run fuzzer {}: {}'.format( + obj_files_command, obj_err)) + if args.verbose: + print(obj_out) + + # Step 4: Call objdump on each .o file in the output directory, + # to generate corresponding .objdump files. + objdump_file = open(filename_prefix + ".objdump", "w+r") + cmd = '{objdump} -dr -M numeric {filename}.o' + cmd = cmd.format(objdump=args.objdump, filename=filename_prefix) + objdump_command = shlex.split(cmd) + objdump_proc = subprocess.Popen(objdump_command, + stdout=objdump_file, + stderr=subprocess.STDOUT) + objdump_out, objdump_err = objdump_proc.communicate() + if objdump_proc.returncode != 0: + raise ValueError('failed to run objdump {}: ' + + '{}'.format(objdump_command, objdump_err)) + + # Step 5: Process files generated by objdump so that the files only contain + # instructions. + objdump_file.seek(0) + temp_objdump_file = tempfile.TemporaryFile("w+r") + # Only the lines containing asm instructions have tabs in them; remove all + # other lines (for example, header lines and whitespace). + for line in objdump_file: + if "\t" in line: + temp_objdump_file.write(line) + objdump_file.close() + with open(filename_prefix + ".parsed_objdump", "w") as write_objdump_file: + temp_objdump_file.seek(0) + # Each line of the objdump output looks something like this: + # 0: 00318033 add x0,x3,x3 + # We remove the first two columns, leaving only the asm instruction. + for line in temp_objdump_file: + parts = line.split()[2:] + for part in parts: + write_objdump_file.write("\t") + write_objdump_file.write(part) + write_objdump_file.write("\n") + temp_objdump_file.close() + + # Step 6: Diff the .s file generated by proto-to-asm and the .objdump file + # generated by objdump. + prefix = "Checking " + filename + "..." + file1 = open(filename_prefix + ".parsed_s", "r") + file2 = open(filename_prefix + ".parsed_objdump", "r") + diff = difflib.ndiff(file1.readlines(), file2.readlines()) + delta = ''.join(x[0:] for x in diff if x.startswith(('- ', '+ '))) + if delta: + print(prefix + "FAILURE!") + fails = fails + 1 + list_of_failures.append(filename) + list_of_failures.append(delta) + else: + print(prefix + "SUCCESS!") + passes = passes + 1 + +print("Succeeded: " + str(passes) + "\t\tFailed: " + str(fails) + + "\t\t[Total: " + str(passes + fails) + "]") + +if (fails != 0): + print("The following files failed...") + for item in list_of_failures: + print(item) + +sys.exit(0 if fails == 0 else 1) Index: tools/llvm-mc-assemble-proto-fuzzer/mcfuzz.py =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/mcfuzz.py +++ tools/llvm-mc-assemble-proto-fuzzer/mcfuzz.py @@ -8,133 +8,259 @@ import sys import tempfile -parser = argparse.ArgumentParser() - -# Flags for the directory names, corpus and outputdir. -parser.add_argument("--corpus", type=str, help="corpus directory name", \ - required=True) -parser.add_argument("--out", type=str, \ - help="output directory name for obj files", default=None) - -# Flags for llvm-mc-assemble-proto-fuzzer: triple, mattr, filetype. -parser.add_argument("--triple", type=str, help="specify the triple", \ - default="riscv32") -parser.add_argument("--mattr", type=str, help="specify mattr", \ - default="") -parser.add_argument("--filetype", type=str, help="asm or obj", \ - default="obj") - -# These args are passed in after the -fuzzer-args flag. -parser.add_argument("--runs", type=int, help="number of runs", default=100) -parser.add_argument("--max-len", type=int, \ - help="limit instruction size for fuzzing", default=40) - -# Flag specifies the name of objdump executable. -parser.add_argument("--objdump", type=str, help="specify the path to" \ - " objdump", default="/prj/llvm-arm/home/common/build_tools/" \ - "riscv32-gnu-7.2.0/bin/riscv32-unknown-linux-gnu-objdump") - -# Flag specifies that we should print out everything -parser.add_argument("--verbose", dest="verbose", action="store_true") -parser.set_defaults(verbose=False) - -parser.add_argument("--riscv-no-aliases", dest="NoAliases", - action="store_true") -parser.set_defaults(NoAliases=False) - -args = parser.parse_args() - -# Step 1: Invoke fuzzer to generate a corpus. -cmd = 'llvm-mc-assemble-proto-fuzzer {corpus} -triple={triple} -mattr={mattr}' \ - + ' -filetype={filetype} -fuzzer-args -runs={runs} -max_len={max_len}' -cmd = cmd.format(corpus=args.corpus, triple=args.triple, mattr=args.mattr, \ - filetype=args.filetype, runs=args.runs, max_len=args.max_len) -fuzz_command = shlex.split(cmd) - -fuzz_proc = subprocess.Popen(fuzz_command, stdout=subprocess.PIPE, \ - stderr=subprocess.STDOUT) -fuzz_out, fuzz_err = fuzz_proc.communicate() -if fuzz_proc.returncode != 0: - raise ValueError('failed to run fuzz {}: {}'.format(fuzz_command, fuzz_err)) -if args.verbose: - print(fuzz_out) - -# If user specified an output directory, proceed to step 2; otherwise, exit. -if args.out is None: - print("No output directory specified; exiting after populating " - "corpus directory.") - sys.exit(0) - -# Keep track of which diffs pass / fail. -passes = 0 -fails = 0 -list_of_failures = [] -# Iterate through the corpus body. -# For each file, generate a .s and a .o file. Then, use objdump to -# generate a .objdump file, which we will compare to the .s file. -# Diff the .s with the corresponding .objdump file. -for filename in os.listdir(args.corpus): - filename_prefix = args.out + "/" + filename +def main(): + parser = argparse.ArgumentParser() + parse_arguments(parser) + args = parser.parse_args() + + # Step 1: Invoke fuzzer to generate a corpus. + call_fuzzer(args) + + # If user did not specify output dir, skip steps 2-8; exit. + if args.out is None: + print("No output directory specified; exiting after populating " + "corpus directory.") + sys.exit(0) + + # List of corpus files that both LLVM AS and GNU AS fail to assemble. + list_of_llvm_as_and_gnu_as_fails = [] + # List of corpus files that only LLVM AS fails to assemble. + list_of_llvm_as_fails = [] + # List of corpus files that only GNU AS fails to assemble. + list_of_gnu_as_fails = [] + # List of corpus files that both LLVM AS and GNU AS can assemble. + list_of_llvm_as_and_gnu_as_passes = [] + # Keep track of which diffs pass / fail. + passes = 0 + fails = 0 + list_of_diffs = [] + + # Iterate through the corpus body. + + # For each file, re-run the fuzzer to generate an object file with the fuzz + # target. Also run the golden assembler (gnu as) to generate a reference + # object file. If both assemblers behave the same way, proceed to generate + # a .s file (using proto-to-asm tool), disassemble the object file + # generated by the fuzz target (using objdump) and then compare the + # resulting .objdump with your .s file. + for filename in os.listdir(args.corpus): + filename_prefix = args.out + "/" + filename + # Step 2: Run fuzzer with filetype=obj; check for error in fuzz target. + target_error_occurred, out = call_fuzzer_on_corpus_file(args, filename) + + # Step 3: Run proto-to-asm on corpus file to generate .s file. + call_proto_to_asm(args, filename) + + # Step 4: Run golden assembler (gnu) and check for error. + golden_error_occurred = call_golden_assembler(args, filename) + + # Step 5: Compare behavior of fuzz target and golden assembler. + #################################################################### + #-- Initial, tentative interpretation of results, based on the --# + #-- status (pass/fail) of tools (LLVM AS, GNU AS, GNU OBJDUMP) --# + #------------------------------------------------------------------# + # LLVM AS | GNU AS | GNU OBJDUMP | Conclusion # + #------------------------------------------------------------------# + # 0 | 0 | x | invalid/unimplemented instr # + # 0 | 1 | x | LLVM MC bug/unimplemented instr # + # 1 | 0 | x | LLVM MC bug # + # 1 | 1 | 0 | LLVM MC bug and GCC bug # + # 1 | 1 | 1 | success # + #################################################################### + if target_error_occurred and golden_error_occurred: + print("Both assemblers failed to assemble file: " + filename) + list_of_llvm_as_and_gnu_as_fails.append(filename) + continue + elif target_error_occurred: + print("Only the target AS failed to assemble file: " + filename) + list_of_llvm_as_fails.append(filename) + out = out.split("\n") + for line in out: + if (line.startswith("error:")): + list_of_llvm_as_fails.append(line) + nextline = out[out.index(line) + 1] + list_of_llvm_as_fails.append(nextline) + continue + continue + elif golden_error_occurred: + print("Only the golden AS failed to assemble file: " + filename) + list_of_gnu_as_fails.append(filename) + continue + else: + print("Both assemblers assembled file: " + filename) + list_of_llvm_as_and_gnu_as_passes.append(filename) + + # Step 6: Call objdump on each .o file (generated by fuzz target) in + # the output directory, to generate corresponding .objdump files. + objdump_file = call_objdump(args, filename) + + # Step 7: Process files generated by objdump so that the files only + # contain instructions. + process_objdump_file(objdump_file, args, filename) + + # Step 8: Diff the .s file generated by proto-to-asm with .objdump file + # generated by objdump. + passes, fails, list_of_diffs = print_file_status(args, filename, + passes, fails, + list_of_diffs) + + print_result(passes, fails, list_of_llvm_as_and_gnu_as_fails, + list_of_llvm_as_fails, list_of_gnu_as_fails, + list_of_llvm_as_and_gnu_as_passes, list_of_diffs) + + sys.exit(0 if fails == 0 else 1) + + +def parse_arguments(parser): + # Flags for the directory names, corpus and outputdir. + parser.add_argument("--corpus", type=str, help="corpus directory name", + required=True) + parser.add_argument("--out", type=str, + help="output directory name for obj files", + default=None) + + # Flags for llvm-mc-assemble-proto-fuzzer: triple, mattr, filetype. + parser.add_argument("--triple", type=str, help="specify the triple", + default="riscv32") + parser.add_argument("--mattr", type=str, help="specify mattr", + default="") + parser.add_argument("--march", type=str, help="specify march", + default="rv32i") + parser.add_argument("--filetype", type=str, help="asm or obj", + default="obj") + + # These args are passed in after the -fuzzer-args flag. + parser.add_argument("--runs", type=int, help="number of runs", default=100) + parser.add_argument("--max-len", type=int, + help="limit instruction size for fuzzing", default=40) + + # Flag specifies the name of objdump executable. + parser.add_argument("--objdump", type=str, help="specify the path to" + " objdump", default="/prj/llvm-arm/home/common/" + "build_tools/riscv32-gnu-7.2.0/bin/" + "riscv32-unknown-linux-gnu-objdump") + + # Flag specifies the name of golden assembler executable. + parser.add_argument("--assemble", type=str, help="specify the path to" + " golden assembler", default="/prj/llvm-arm/home/" + "common/build_tools/riscv32-gnu-7.2.0/bin/" + "riscv32-unknown-linux-gnu-as") + + # Flag specifies that we should print out everything + parser.add_argument("--verbose", dest="verbose", action="store_true") + parser.set_defaults(verbose=False) + + parser.add_argument("--riscv-no-aliases", dest="NoAliases", + action="store_true") + parser.set_defaults(NoAliases=False) + + +# This function calls the llvm-mc-assemble-proto-fuzzer with a given extension +def call_fuzzer(args): + cmd = 'llvm-mc-assemble-proto-fuzzer-{march} {corpus} \ + -triple={triple} -mattr={mattr} -filetype={filetype} -fuzzer-args ' \ + + '-runs={runs} -max_len={max_len}' + cmd = cmd.format(march=args.march, corpus=args.corpus, triple=args.triple, + mattr=args.mattr, filetype=args.filetype, + runs=args.runs, max_len=args.max_len) + fuzz_command = shlex.split(cmd) - # Step 2: Run proto-to-asm on corpus file to generate .s file. - proto_to_asm_command = ["llvm-mc-assemble-proto-to-asm", \ - args.corpus + "/" + filename, "-riscv-no-aliases=" + - str(args.NoAliases)] + fuzz_proc = subprocess.Popen(fuzz_command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + fuzz_out, fuzz_err = fuzz_proc.communicate() + if fuzz_proc.returncode != 0: + raise ValueError('failed to run fuzz {}: {}'.format(fuzz_command, + fuzz_err)) + if args.verbose: + print(fuzz_out) + + +def call_fuzzer_on_corpus_file(args, filename): + target_error_occurred = False + cmd = 'llvm-mc-assemble-proto-fuzzer-{march} {corpus}/{file} ' \ + + '-triple={triple} -mattr={mattr} -out={out} ' \ + + '-filetype=obj -fuzzer-args -runs=1' + cmd = cmd.format(march=args.march, corpus=args.corpus, file=filename, + triple=args.triple, mattr=args.mattr, out=args.out) + target_as_command = shlex.split(cmd) + target_as_proc = subprocess.Popen(target_as_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + target_as_out, target_as_err = target_as_proc.communicate() + if target_as_proc.returncode != 0: + raise ValueError('failed to run fuzzer {}: {}'.format( + target_as_command, target_as_err)) + if target_as_out: + if "error" in target_as_out: + print("Fuzz Target failed to assemble this input: " + filename) + target_error_occurred = True + if args.verbose: + print(target_as_out) + return target_error_occurred, target_as_out + + +def call_proto_to_asm(args, filename): + filename_prefix = args.out + "/" + filename + cmd = 'llvm-mc-assemble-proto-to-asm-{march} {corpus}/{file}' \ + + ' -riscv-no-aliases={NoAliases}' + cmd = cmd.format(march=args.march, corpus=args.corpus, file=filename, + NoAliases=args.NoAliases) + proto_to_asm_command = shlex.split(cmd) asm_file = open(filename_prefix + ".s", "w+r") asm_proc = subprocess.Popen(proto_to_asm_command, stdout=asm_file, - stderr=subprocess.STDOUT) + stderr=subprocess.STDOUT) asm_out, asm_err = asm_proc.communicate() if asm_proc.returncode != 0: raise ValueError('failed to run {}: {}'.format(proto_to_asm_command, - asm_err)) - - # Step 3: Generate .o files in the outputdir. - cmd = 'llvm-mc-assemble-proto-fuzzer {corpus}/{file} -triple={triple}' \ - + ' -mattr={mattr} -out={out} -filetype=obj -fuzzer-args -runs=1' - cmd = cmd.format(corpus=args.corpus, file=filename, triple=args.triple, \ - mattr=args.mattr, out=args.out) - obj_files_command = shlex.split(cmd) - obj_proc = subprocess.Popen(obj_files_command, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - obj_out, obj_err = obj_proc.communicate() - if obj_proc.returncode != 0: - raise ValueError('failed to run fuzzer {}: {}'.format( \ - obj_files_command, obj_err)) - if args.verbose: - print(obj_out) + asm_err)) + elif asm_out: + print("Asm_out: " + asm_out) + asm_file.close() + + +def call_golden_assembler(args, filename): + golden_error_occurred = False + cmd = '{assemble} {dirname}/{filename}.s -march={march} ' \ + + '-o {dirname}/{filename}.out' + cmd = cmd.format(assemble=args.assemble, march=args.march, + dirname=args.out, filename=filename) + golden_as_command = shlex.split(cmd) + golden_as_proc = subprocess.Popen(golden_as_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + golden_as_out, golden_as_err = golden_as_proc.communicate() + if golden_as_proc.returncode != 0: + if (golden_as_out): + print("golden_as_out: " + golden_as_out) + golden_error_occurred = True + elif args.verbose: + print(golden_as_out) + return golden_error_occurred + - # Step 4: Call objdump on each .o file in the output directory, - # to generate corresponding .objdump files. +def call_objdump(args, filename): + filename_prefix = args.out + "/" + filename objdump_file = open(filename_prefix + ".objdump", "w+r") cmd = '{objdump} -dr -M numeric {filename}.o' cmd = cmd.format(objdump=args.objdump, filename=filename_prefix) objdump_command = shlex.split(cmd) objdump_proc = subprocess.Popen(objdump_command, - stdout=objdump_file, stderr=subprocess.STDOUT) + stdout=objdump_file, + stderr=subprocess.STDOUT) objdump_out, objdump_err = objdump_proc.communicate() if objdump_proc.returncode != 0: - raise ValueError('failed to run objdump {}: ' \ - + '{}'.format(objdump_command, objdump_err)) + raise ValueError('failed to run objdump {}: ' + + '{}'.format(objdump_command, objdump_err)) + return objdump_file - # Step 5: Process asm files so that files only contain asm instructions. - asm_file.seek(0) - lines = asm_file.readlines() - asm_file.close() - write_asm_file = open(filename_prefix + ".parsed_s", "w") - # Remove the first line of .s file, which contains the filename. For example: - # // corpus/dff0318decde43ce5065a4209412aa2c68d01318 - for line in lines: - if "\t" in line: - write_asm_file.write(line) - write_asm_file.close() - # Step 6: Process files generated by objdump so that the files only contain - # instructions. +def process_objdump_file(objdump_file, args, filename): + filename_prefix = args.out + "/" + filename objdump_file.seek(0) temp_objdump_file = tempfile.TemporaryFile("w+r") - # Only the lines containing asm instructions have tabs in them; remove all - # other lines (for example, header lines and whitespace). + # Only the lines containing asm instructions have tabs in them; remove + # all other lines (for example, header lines and whitespace). for line in objdump_file: if "\t" in line: temp_objdump_file.write(line) @@ -152,28 +278,65 @@ write_objdump_file.write("\n") temp_objdump_file.close() - # Step 7: Diff the .s file generated by proto-to-asm and the .objdump file - # generated by objdump. + +def print_file_status(args, filename, passes, fails, list_of_diffs): + filename_prefix = args.out + "/" + filename prefix = "Checking " + filename + "..." - file1 = open(filename_prefix + ".parsed_s", "r") + file1 = open(filename_prefix + ".s", "r") file2 = open(filename_prefix + ".parsed_objdump", "r") diff = difflib.ndiff(file1.readlines(), file2.readlines()) delta = ''.join(x[0:] for x in diff if x.startswith(('- ', '+ '))) if delta: print(prefix + "FAILURE!") fails = fails + 1 - list_of_failures.append(filename) - list_of_failures.append(delta) + list_of_diffs.append(filename) + list_of_diffs.append(delta) else: print(prefix + "SUCCESS!") passes = passes + 1 + file1.close() + file2.close() + return passes, fails, list_of_diffs + + +def print_result(passes, fails, list1, list2, list3, list4, list5): + # Both llvm-mc-assembler and golden (gnu) assembler fail + count1 = 0 + for item in list1: + count1 = count1 + 1 + + count2 = 0 + # Target assembler llvm-mc fails + for item in list2: + count2 = count2 + 1 + + if (count2 != 0): + print("Target LLVM AS failed to assemble these inputs...") + for item in list2: + print(item) + + count3 = 0 + # Golden (gnu) assembler fails + for item in list3: + count3 = count3 + 1 -print("Succeeded: " + str(passes) + "\t\tFailed: " + str(fails) \ - + "\t\t[Total: " + str(passes + fails) + "]") + count4 = 0 + # Both assemblers pass + for item in list4: + count4 = count4 + 1 -if (fails != 0): - print("The following files failed...") - for item in list_of_failures: - print(item) + if (fails != 0): + print("Target LLVM AS and golden GNU AS both assembled " + "these inputs, but input ASM string differed from" + "GNU disassembler-generated ASM string...") + for item in list5: + print(item) + print("Both llvm as and gnu assembler fail: {}".format(count1)) + print("Only llvm as fails: {}".format(count2)) + print("Only gnu as fails: {}".format(count3)) + print("Both llvm as and gnu assembler pass: {}".format(count4)) + print("[Succeeded: {}/{}\tFailed: {}/{}]".format(passes, + count4, fails, count4)) -sys.exit(0 if fails == 0 else 1) +if __name__ == "__main__": + main() Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/example_asm_proto.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/example_asm_proto.proto @@ -0,0 +1,63 @@ +//===-- example_asm_proto.proto - Protobuf description of ASM -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes a subset of ASM as a protobuf. It is used by the +/// example fuzzer to generate basic inputs to fuzz the llvm mc layer. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message Register { + enum RegName { + X0 = 1; X1 = 2; X2 = 3; X3 = 4; X4 = 5; + X5 = 6; X6 = 7; X7 = 8; X8 = 9; X9 = 10; + X10 = 11; X11 = 12; X12 = 13; X13 = 14; X14 = 15; + X15 = 16; X16 = 17; X17 = 18; X18 = 19; X19 = 20; + X20 = 21; X21 = 22; X22 = 23; X23 = 24; X24 = 25; + X25 = 26; X26 = 27; X27 = 28; X28 = 29; X29 = 30; + X30 = 31; X31 = 32; + }; + required RegName name = 1; +} + +message RTypeOpcode { + enum Op { + ADD = 1; SUB = 2; + }; + required Op op = 1; +} + +message RTypeOperands { + required Register operand1 = 1; + required Register operand2 = 2; + required Register operand3 = 3; +} + +message RTypeStatement { + required RTypeOpcode opcode = 1; + required RTypeOperands operands = 2; +} + +message AsmStatement { + oneof asmstatement_oneof { + RTypeStatement statement = 2; + } +} + +message AsmStatementSeq { + repeated AsmStatement statements = 1; +} + +message Assembly { + required AsmStatementSeq asmStatements = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32.proto @@ -0,0 +1,129 @@ +//===-- rv32.proto - Protobuf description of ASM --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file serves as the base for the protobuf representation of the RV32 +/// ISA of RISC-V ASM. It is used to more easily find interesting inputs +/// for fuzzing the llvm mc layer. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +import "rv32c.proto"; +import "rv32i.proto"; +import "rv32m.proto"; + +message Immediate { + oneof imm_oneof { + sint32 s_imm = 1; + uint32 u_imm = 2; + } +} + +message Register { + enum RegName { + X0 = 0; X1 = 1; X2 = 2; X3 = 3; X4 = 4; + X5 = 5; X6 = 6; X7 = 7; X8 = 8; X9 = 9; + X10 = 10; X11 = 11; X12 = 12; X13 = 13; X14 = 14; + X15 = 15; X16 = 16; X17 = 17; X18 = 18; X19 = 19; + X20 = 20; X21 = 21; X22 = 22; X23 = 23; X24 = 24; + X25 = 25; X26 = 26; X27 = 27; X28 = 28; X29 = 29; + X30 = 30; X31 = 31; + }; + required RegName regname = 1; +} + +// Note: The order of the fields does not matter for the grammar since +// the structure of the fuzzed ASM statements is handled in proto_to_asm. +message RegRegRegStmt { + oneof opcode_oneof { + I_RFormatOpcode opcode1 = 4; + M_RFormatOpcode opcode2 = 5; + } + required Register operand1 = 1; + required Register operand2 = 2; + required Register operand3 = 3; +} + +message RegRegImmStmt1 { + oneof opcode_oneof { + I_BFormatOpcode opcode1 = 4; + I_IFormatOpcode opcode2 = 5; + } + required Register operand1 = 1; + required Register operand2 = 2; + required Immediate operand3 = 3; +} + +message RegRegImmStmt2 { + oneof opcode_oneof { + C_CLFormatOpcode opcode1 = 4; + C_CSFormat1Opcode opcode2 = 5; + I_IFormatLoadOpcode opcode3 = 6; + I_SFormatOpcode opcode4 = 7; + } + required Register operand1 = 1; + required Register operand2 = 2; + required Immediate operand3 = 3; +} + +message RegRegStmt { + oneof opcode_oneof { + C_CRFormat2Opcode opcode1 = 3; + C_CSFormat2Opcode opcode2 = 4; + } + required Register operand1 = 1; + required Register operand2 = 2; +} + +message RegImmStmt { + oneof opcode_oneof { + C_CBFormatOpcode opcode1 = 3; + C_CIFormatOpcode opcode2 = 4; + C_CIWFormatOpcode opcode3 = 5; + C_CSSFormatOpcode opcode4 = 6; + I_UFormatOpcode opcode5 = 7; + I_JFormatOpcode opcode6 = 8; + } + required Register operand1 = 1; + required Immediate operand2 = 2; +} + +message RegStmt { + required C_CRFormat1Opcode opcode1 = 2; + required Register operand1 = 1; +} + +message ImmStmt { + required C_CJFormatOpcode opcode1 = 2; + required Immediate operand1 = 1; +} + +message AsmStmt { + oneof asmstmt_oneof { + RegRegRegStmt stmt1 = 1; + RegRegImmStmt1 stmt2 = 2; + RegRegImmStmt2 stmt3 = 3; + RegRegStmt stmt4 = 4; + RegImmStmt stmt5 = 5; + RegStmt stmt6 = 6; + ImmStmt stmt7 = 7; + } +} + +message AsmStmtSeq { + repeated AsmStmt stmts = 1; +} + +message Assembly { + required AsmStmtSeq asmstmts = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32c.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32c.proto @@ -0,0 +1,103 @@ +//===-- rv32c.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the C-extension for the +/// RV32 ISA, in Protobuf form. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message C_CIFormatOpcode { + enum Op { + CADDI = 0; + CADDI16SP = 1; + CLI = 2; + CLUI = 3; + CLWSP = 4; + CSLLI = 5; + } + required Op op = 1; +} + +message C_CSSFormatOpcode { + enum Op { + CSWSP = 0; + } + required Op op = 1; +} + +message C_CLFormatOpcode { + enum Op { + CLW = 0; + } + required Op op = 1; +} + +message C_CSFormat1Opcode { + enum Op { + CSW = 0; + } + required Op op = 1; +} + +message C_CSFormat2Opcode { + enum Op { + CAND = 0; + COR = 1; + CSUB = 2; + CXOR = 3; + } + required Op op = 1; +} + +message C_CJFormatOpcode { + enum Op { + CJ = 0; + CJAL = 1; + } + required Op op = 1; +} + +message C_CRFormat1Opcode { + enum Op { + CJR = 0; + CJALR = 1; + } + required Op op = 1; +} + +message C_CRFormat2Opcode { + enum Op { + CADD = 0; + CMV = 1; + } + required Op op = 1; +} + +message C_CBFormatOpcode { + enum Op { + CANDI = 0; + CBEQZ = 1; + CBNEZ = 2; + CSRAI = 3; + CSRLI = 4; + } + required Op op = 1; +} + +message C_CIWFormatOpcode { + enum Op { + CADDI4SPN = 0; + } + required Op op = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32i.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32i.proto @@ -0,0 +1,100 @@ +//===-- rv32i.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the I-extension for the +/// RV32 ISA, in Protobuf form. +/// The grammar currently supports all the instructions in the RV32I ISA, +/// except for the following instructions: FENCE, FENCE.I, ECALL, EBREAK, +/// CSRRW, CSRRS, CSRRC, CSRRWI, CSRRSI, and CSRRCI. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message I_RFormatOpcode { + enum Op { + ADD = 0; + AND = 1; + OR = 2; + SLL = 3; + SLT = 4; + SLTU = 5; + SRA = 6; + SRL = 7; + SUB = 8; + XOR = 9; + }; + required Op op = 1; +} + +message I_IFormatOpcode { + enum Op { + ADDI = 0; + ANDI = 1; + JALR = 2; + ORI = 3; + SLLI = 4; + SLTI = 5; + SLTIU = 6; + SRAI = 7; + SRLI = 8; + XORI = 9; + }; + required Op op = 1; +} + +message I_IFormatLoadOpcode { + enum Op { + LB = 0; + LBU = 1; + LH = 2; + LHU = 3; + LW = 4; + }; + required Op op = 1; +} + +message I_SFormatOpcode { + enum Op { + SB = 0; + SH = 1; + SW = 2; + }; + required Op op = 1; +} + +message I_BFormatOpcode { + enum Op { + BEQ = 0; + BGE = 1; + BGEU = 2; + BLT = 3; + BLTU = 4; + BNE = 5; + }; + required Op op = 1; +} + +message I_UFormatOpcode { + enum Op { + AUIPC = 0; + LUI = 1; + }; + required Op op = 1; +} + +message I_JFormatOpcode { + enum Op { + JAL = 0; + }; + required Op op = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32m.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32m.proto @@ -0,0 +1,32 @@ +//===-- rv32m.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the instruction opcodes in the M-extension for the +/// RV32 ISA, in Protobuf form. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message M_RFormatOpcode { + enum Op { + DIV = 0; + DIVU = 1; + MUL = 2; + MULH = 3; + MULHSU = 4; + MULHU = 5; + REM = 6; + REMU = 7; + }; + required Op op = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/CMakeLists.txt =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/CMakeLists.txt +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/CMakeLists.txt @@ -2,13 +2,23 @@ set(CMAKE_CXX_FLAGS ${CXX_FLAGS_NOFUZZ}) # Needed by LLVM's CMake checks because this file defines multiple targets. -set(LLVM_OPTIONAL_SOURCES proto_to_asm.cpp proto_to_asm_main.cpp) +set(LLVM_OPTIONAL_SOURCES example_proto_to_asm.cpp proto_to_asm_main.cpp + proto_to_asm_rv32.cpp) -add_clang_library(mcProtoToASM proto_to_asm.cpp +add_clang_library(mcProtoToASM example_proto_to_asm.cpp DEPENDS mcASMProto LINK_LIBS mcASMProto ${PROTOBUF_LIBRARIES} ) +add_clang_library(mcRv32ProtoToASM proto_to_asm_rv32.cpp + DEPENDS mcRv32ASMProto + LINK_LIBS mcRv32ASMProto ${PROTOBUF_LIBRARIES} + ) + add_clang_executable(llvm-mc-assemble-proto-to-asm proto_to_asm_main.cpp) +add_clang_executable(llvm-mc-assemble-proto-to-asm-rv32 + proto_to_asm_main.cpp) target_link_libraries(llvm-mc-assemble-proto-to-asm PRIVATE mcProtoToASM) +target_link_libraries(llvm-mc-assemble-proto-to-asm-rv32 + PRIVATE mcRv32ProtoToASM) Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/example_proto_to_asm.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/example_proto_to_asm.cpp @@ -0,0 +1,85 @@ +//==-- example_proto_to_asm.cpp - Protobuf-ASM conversion ------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements functions for converting between protobufs for an example +// assembly language grammar and the assembly language instructions. +// +//===----------------------------------------------------------------------===// +#include "proto_to_asm.h" +#include "example_asm_proto.pb.h" + +#include +#include +#include +#include +#include + +using namespace google::protobuf; + +#define EMIT_ASM(X,Y) \ + { \ + const EnumDescriptor * ED = X; \ + if (ED) \ + return OS << EmitAsm(ED, Y); \ + assert(!"Could not find descriptor."); \ + } + +static std::string EmitAsm(const EnumDescriptor * Enum, + int Num) { + const EnumValueDescriptor * D = Enum->FindValueByNumber(Num); + std::string Msg = D->name(); + std::transform(Msg.begin(), Msg.end(), Msg.begin(), ::tolower); + return Msg; +} + +namespace mc_proto_fuzzer { +std::ostream &operator<<(std::ostream &OS, const Register &X) { + EMIT_ASM((Register_RegName_descriptor()), (X.name())); +} +std::ostream &operator<<(std::ostream &OS, const RTypeOpcode &X) { + EMIT_ASM((RTypeOpcode_Op_descriptor()), (X.op())); +} +std::ostream &operator<<(std::ostream &OS, const RTypeOperands &X) { + OS << X.operand1(); + OS << "," << X.operand2(); + OS << "," << X.operand3(); + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RTypeStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operands() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const AsmStatement &X) { + return OS << X.statement(); +} +std::ostream &operator<<(std::ostream &OS, const AsmStatementSeq &X) { + for (auto &ST : X.statements()) + OS << ST; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const Assembly &X) { + return OS << X.asmstatements(); +} + +// --------------------------------- + +std::string FunctionToString(const Assembly &Input) { + std::ostringstream OS; + OS << Input; + return OS.str(); +} +std::string ProtoToASM(const uint8_t *Data, size_t Size) { + Assembly Message; + if (!Message.ParsePartialFromArray(Data, Size)) + return "#error invalid proto\n"; + return FunctionToString(Message); +} + +} // namespace mc_proto_fuzzer Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.h =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.h +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.h @@ -11,13 +11,11 @@ // //===----------------------------------------------------------------------===// -#include -#include #include namespace mc_proto_fuzzer { class Assembly; std::string FunctionToString(const Assembly &input); -std::string ProtoToASM(const uint8_t *data, size_t size, bool flag_enabled); +std::string ProtoToASM(const uint8_t *data, size_t size); } Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.cpp =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.cpp +++ /dev/null @@ -1,128 +0,0 @@ -//==-- proto_to_asm.cpp - Protobuf-ASM conversion --------------------------==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implements functions for converting between protobufs for an example -// assembly language grammar and the assembly language instructions. -// -//===----------------------------------------------------------------------===// -#include "proto_to_asm.h" -#include "asm_proto.pb.h" - -#include -#include - -namespace mc_proto_fuzzer { - -static bool NoAliases = false; -std::ostream &operator<<(std::ostream &OS, const Register &X) { - switch (X.name()) { - case Register::X0: OS << "x0"; break; - case Register::X1: OS << "x1"; break; - case Register::X2: OS << "x2"; break; - case Register::X3: OS << "x3"; break; - case Register::X4: OS << "x4"; break; - case Register::X5: OS << "x5"; break; - case Register::X6: OS << "x6"; break; - case Register::X7: OS << "x7"; break; - case Register::X8: OS << "x8"; break; - case Register::X9: OS << "x9"; break; - case Register::X10: OS << "x10"; break; - case Register::X11: OS << "x11"; break; - case Register::X12: OS << "x12"; break; - case Register::X13: OS << "x13"; break; - case Register::X14: OS << "x14"; break; - case Register::X15: OS << "x15"; break; - case Register::X16: OS << "x16"; break; - case Register::X17: OS << "x17"; break; - case Register::X18: OS << "x18"; break; - case Register::X19: OS << "x19"; break; - case Register::X20: OS << "x20"; break; - case Register::X21: OS << "x21"; break; - case Register::X22: OS << "x22"; break; - case Register::X23: OS << "x23"; break; - case Register::X24: OS << "x24"; break; - case Register::X25: OS << "x25"; break; - case Register::X26: OS << "x26"; break; - case Register::X27: OS << "x27"; break; - case Register::X28: OS << "x28"; break; - case Register::X29: OS << "x29"; break; - case Register::X30: OS << "x30"; break; - case Register::X31: OS << "x31"; break; - } - return OS; -} -std::ostream &operator<<(std::ostream &OS, const ITypeOpcode &X) { - switch (X.op()) { - case ITypeOpcode_Op_ADD: OS << "add"; break; - case ITypeOpcode_Op_SUB: OS << "sub"; break; - } - return OS; -} -std::ostream &operator<<(std::ostream &OS, const ITypeOperands &X) { - OS << X.operand1(); - OS << "," << X.operand2(); - OS << "," << X.operand3(); - return OS; -} -std::ostream &operator<<(std::ostream &OS, const ITypeStatement &X) { - OS << "\t" << X.opcode() << "\t"; - OS << X.operands() << "\n"; - return OS; -} -std::ostream &operator<<(std::ostream &OS, const AsmStatement &X) { - if (X.has_statement()) { - ITypeOperands ThreeOperands = X.statement().operands(); - Register Oper1 = ThreeOperands.operand1(); - Register Oper2 = ThreeOperands.operand2(); - Register Oper3 = ThreeOperands.operand3(); - if (NoAliases) { - OS << X.statement(); - return OS; - } - if (X.statement().opcode().op() == ITypeOpcode_Op_ADD) { - OS << X.statement(); - return OS; - } - else if (X.statement().opcode().op() == - ITypeOpcode_Op_SUB) { - if (Oper2.name() == Register_RegName_X0) { - OS << "\tneg\t" << Oper1 << "," << Oper3 << "\n"; - return OS; - } else { - OS << X.statement(); - return OS; - } - } - } - return OS; -} -std::ostream &operator<<(std::ostream &OS, const AsmStatementSeq &X) { - for (auto &ST : X.statements()) OS << ST; - return OS; -} -std::ostream &operator<<(std::ostream &OS, const Assembly &X) { - return OS << X.asmstatements(); -} - -// --------------------------------- - -std::string FunctionToString(const Assembly &Input) { - std::ostringstream OS; - OS << Input; - return OS.str(); -} -std::string ProtoToASM(const uint8_t *Data, size_t Size, bool Flag) { - Assembly Message; - NoAliases = Flag; - if (!Message.ParsePartialFromArray(Data, Size)) - return "#error invalid proto\n"; - return FunctionToString(Message); -} - -} // namespace mc_proto_fuzzer Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_main.cpp =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_main.cpp +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_main.cpp @@ -12,41 +12,18 @@ //===----------------------------------------------------------------------===// #include "proto_to_asm.h" -#include "llvm/Support/CommandLine.h" - #include #include #include #include -using namespace llvm; -static cl::opt NoAliases("riscv-no-aliases", - cl::desc("Set to false to match printed asm" - " of objdump"), - cl::value_desc("boolean"), - cl::init(false)); - int main(int argc, char **argv) { - static std::vector ModifiedArgv; - ModifiedArgv.push_back(argv[0]); - static std::vector NewArgv; - NewArgv.push_back(argv[0]); for (int i = 1; i < argc; i++) { - std::string SearchString(argv[i]); - if (SearchString.find("riscv-no-aliases") != llvm::StringLiteral::npos) - ModifiedArgv.push_back(argv[i]); - else - NewArgv.push_back(argv[i]); - } - cl::ParseCommandLineOptions((int) ModifiedArgv.size(), &ModifiedArgv[0]); - for (int i = 1; i < (int) NewArgv.size(); i++) { - std::fstream in(NewArgv[i]); + std::fstream in(argv[i]); std::string str((std::istreambuf_iterator(in)), std::istreambuf_iterator()); - std::cout << "// " << NewArgv[i] << std::endl; std::cout << mc_proto_fuzzer::ProtoToASM( - reinterpret_cast(str.data()), str.size(), - (bool) NoAliases); + reinterpret_cast(str.data()), str.size()); } return 0; } Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_rv32.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_rv32.cpp @@ -0,0 +1,233 @@ +//==-- proto_to_asm_rv32.cpp - Protobuf-ASM conversion ---------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements functions for converting between protobufs for the assembly +// language grammar for RV32I instruction set and assembly language +// instructions. +// +//===----------------------------------------------------------------------===// +#include "proto_to_asm.h" +#include "rv32.pb.h" +#include "rv32c.pb.h" +#include "rv32i.pb.h" +#include "rv32m.pb.h" + +#include +#include +#include +#include +#include + +using namespace google::protobuf; + +#define EMIT_ASM(X,Y,Z) \ + { \ + const EnumDescriptor * ED = X; \ + if (ED) \ + return OS << EmitAsm(ED, Y, Z); \ + assert(!"Could not find descriptor."); \ + } + +static std::string EmitAsm(const EnumDescriptor * Enum, + int Num, bool Compressed) { + const EnumValueDescriptor * D = Enum->FindValueByNumber(Num); + std::string Msg = D->name(); + std::transform(Msg.begin(), Msg.end(), Msg.begin(), ::tolower); + if (Compressed) + Msg.insert(1, 1, '.'); + return Msg; +} + +namespace mc_proto_fuzzer { +std::ostream &operator<<(std::ostream &OS, const Immediate &X) { + if (X.has_s_imm()) + return OS << X.s_imm(); + if (X.has_u_imm()) + return OS << X.u_imm(); +} +std::ostream &operator<<(std::ostream &OS, const Register &X) { + EMIT_ASM(Register_RegName_descriptor(), X.regname(), false); +} +std::ostream &operator<<(std::ostream &OS, const C_CIFormatOpcode &X) { + EMIT_ASM(C_CIFormatOpcode_Op_descriptor(), X.op(), true); +} +std::ostream &operator<<(std::ostream &OS, const C_CSSFormatOpcode &X) { + EMIT_ASM(C_CSSFormatOpcode_Op_descriptor(), X.op(), true); +} +std::ostream &operator<<(std::ostream &OS, const C_CLFormatOpcode &X) { + EMIT_ASM(C_CLFormatOpcode_Op_descriptor(), X.op(), true); +} +std::ostream &operator<<(std::ostream &OS, const C_CSFormat1Opcode &X) { + EMIT_ASM(C_CSFormat1Opcode_Op_descriptor(), X.op(), true); +} +std::ostream &operator<<(std::ostream &OS, const C_CSFormat2Opcode &X) { + EMIT_ASM(C_CSFormat2Opcode_Op_descriptor(), X.op(), true); +} +std::ostream &operator<<(std::ostream &OS, const C_CJFormatOpcode &X) { + EMIT_ASM(C_CJFormatOpcode_Op_descriptor(), X.op(), true); +} +std::ostream &operator<<(std::ostream &OS, const C_CRFormat1Opcode &X) { + EMIT_ASM(C_CRFormat1Opcode_Op_descriptor(), X.op(), true); +} +std::ostream &operator<<(std::ostream &OS, const C_CRFormat2Opcode &X) { + EMIT_ASM(C_CRFormat2Opcode_Op_descriptor(), X.op(), true); +} +std::ostream &operator<<(std::ostream &OS, const C_CBFormatOpcode &X) { + EMIT_ASM(C_CBFormatOpcode_Op_descriptor(), X.op(), true); +} +std::ostream &operator<<(std::ostream &OS, const C_CIWFormatOpcode &X) { + EMIT_ASM(C_CIWFormatOpcode_Op_descriptor(), X.op(), true); +} +std::ostream &operator<<(std::ostream &OS, const M_RFormatOpcode &X) { + EMIT_ASM(M_RFormatOpcode_Op_descriptor(), X.op(), false); +} +std::ostream &operator<<(std::ostream &OS, const I_RFormatOpcode &X) { + EMIT_ASM(I_RFormatOpcode_Op_descriptor(), X.op(), false); +} +std::ostream &operator<<(std::ostream &OS, const I_IFormatLoadOpcode &X) { + EMIT_ASM(I_IFormatLoadOpcode_Op_descriptor(), X.op(), false); +} +std::ostream &operator<<(std::ostream &OS, const I_IFormatOpcode &X) { + EMIT_ASM(I_IFormatOpcode_Op_descriptor(), X.op(), false); +} +std::ostream &operator<<(std::ostream &OS, const I_SFormatOpcode &X) { + EMIT_ASM(I_SFormatOpcode_Op_descriptor(), X.op(), false); +} +std::ostream &operator<<(std::ostream &OS, const I_BFormatOpcode &X) { + EMIT_ASM(I_BFormatOpcode_Op_descriptor(), X.op(), false); +} +std::ostream &operator<<(std::ostream &OS, const I_UFormatOpcode &X) { + EMIT_ASM(I_UFormatOpcode_Op_descriptor(), X.op(), false); +} +std::ostream &operator<<(std::ostream &OS, const I_JFormatOpcode &X) { + EMIT_ASM(I_JFormatOpcode_Op_descriptor(), X.op(), false); +} +std::ostream &operator<<(std::ostream &OS, const RegRegRegStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + OS << X.opcode1(); + else + OS << X.opcode2(); + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << ","; + OS << X.operand3() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RegRegImmStmt1 &X) { + OS << "\t"; + if (X.has_opcode1()) + OS << X.opcode1(); + else + OS << X.opcode2(); + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand2() << ","; + OS << X.operand3() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RegRegImmStmt2 &X) { + OS << "\t"; + if (X.has_opcode1()) + OS << X.opcode1(); + else if (X.has_opcode2()) + OS << X.opcode2(); + else if (X.has_opcode3()) + OS << X.opcode3(); + else + OS << X.opcode4(); + OS << "\t"; + OS << X.operand1() << ","; + OS << X.operand3() << "("; + OS << X.operand2() << ")\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RegRegStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + OS << X.opcode1(); + else + OS << X.opcode2(); + OS << "\t"; + OS << X.operand1() << "," << X.operand2() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RegImmStmt &X) { + OS << "\t"; + if (X.has_opcode1()) + OS << X.opcode1(); + else if (X.has_opcode2()) + OS << X.opcode2(); + else if (X.has_opcode3()) + OS << X.opcode3(); + else if (X.has_opcode4()) + OS << X.opcode4(); + else if (X.has_opcode5()) + OS << X.opcode5(); + else + OS << X.opcode6(); + OS << "\t"; + OS << X.operand1() << "," << X.operand2() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RegStmt &X) { + OS << "\t"; + OS << X.opcode1(); + OS << "\t"; + OS << X.operand1() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const ImmStmt &X) { + OS << "\t"; + OS << X.opcode1(); + OS << "\t"; + OS << X.operand1() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const AsmStmt &X) { + if (X.has_stmt1()) + return OS << X.stmt1(); + if (X.has_stmt2()) + return OS << X.stmt2(); + if (X.has_stmt3()) + return OS << X.stmt3(); + if (X.has_stmt4()) + return OS << X.stmt4(); + if (X.has_stmt5()) + return OS << X.stmt5(); + if (X.has_stmt6()) + return OS << X.stmt6(); + if (X.has_stmt7()) + return OS << X.stmt7(); + return OS; +} +std::ostream &operator<<(std::ostream &OS, const AsmStmtSeq &X) { + for (auto &ST : X.stmts()) + OS << ST; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const Assembly &X) { + return OS << X.asmstmts(); +} + +// --------------------------------- + +std::string FunctionToString(const Assembly &Input) { + std::ostringstream OS; + OS << Input; + return OS.str(); +} +std::string ProtoToASM(const uint8_t *Data, size_t Size) { + Assembly Message; + if (!Message.ParsePartialFromArray(Data, Size)) + return "#error invalid proto\n"; + return FunctionToString(Message); +} + +} // namespace mc_proto_fuzzer