Index: tools/llvm-mc-assemble-proto-fuzzer/CMakeLists.txt =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/CMakeLists.txt +++ tools/llvm-mc-assemble-proto-fuzzer/CMakeLists.txt @@ -23,11 +23,28 @@ add_definitions(-DGOOGLE_PROTOBUF_NO_RTTI) include_directories(${PROTOBUF_INCLUDE_DIRS}) include_directories(${CMAKE_CURRENT_BINARY_DIR}) - protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS asm_proto.proto) + protobuf_generate_cpp(EXAMPLE_PROTO_SRCS EXAMPLE_PROTO_HDRS + proto-files/asm_proto.proto) + protobuf_generate_cpp(RV32_PROTO_SRCS RV32_PROTO_HDRS + proto-files/rv32c.proto + proto-files/rv32i.proto + proto-files/rv32_operands.proto + proto-files/rv32.proto + proto-files/rv32m.proto) + set(LLVM_OPTIONAL_SOURCES ${LLVM_OPTIONAL_SOURCES} ${PROTO_SRCS}) + add_clang_library(mcASMProto - ${PROTO_SRCS} - ${PROTO_HDRS} + ${EXAMPLE_PROTO_SRCS} + ${EXAMPLE_PROTO_HDRS} + + LINK_LIBS + ${PROTOBUF_LIBRARIES} + ) + + add_clang_library(mcRv32ASMProto + ${RV32_PROTO_SRCS} + ${RV32_PROTO_HDRS} LINK_LIBS ${PROTOBUF_LIBRARIES} @@ -37,6 +54,9 @@ include(ProtobufMutatorMC) include_directories(${ProtobufMutator_INCLUDE_DIRS}) + # Build the .proto files. + add_clang_subdirectory(proto-files) + # Build the protobuf->C++ translation library and driver. add_clang_subdirectory(proto-to-asm) @@ -48,6 +68,11 @@ ExampleMCProtoFuzzer.cpp ) + add_clang_executable(llvm-mc-assemble-proto-fuzzer-rv32 + ${DUMMY_MAIN} + ExampleMCProtoFuzzer.cpp + ) + set(COMMON_PROTO_FUZZ_LIBRARIES ${ProtobufMutator_LIBRARIES} ${PROTOBUF_LIBRARIES} @@ -62,6 +87,13 @@ mcProtoToASM ) + target_link_libraries(llvm-mc-assemble-proto-fuzzer-rv32 + PRIVATE + ${COMMON_PROTO_FUZZ_LIBRARIES} + mcRv32ASMProto + mcRv32ProtoToASM + ) + endif() add_clang_subdirectory(handle-asm) Index: tools/llvm-mc-assemble-proto-fuzzer/README.txt =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/README.txt +++ tools/llvm-mc-assemble-proto-fuzzer/README.txt @@ -10,8 +10,10 @@ -DCMAKE_PREFIX_PATH=/path/to/install \ -DPBM_FUZZ_PATH=/full/path/to/protobuf/install \ -DPBM_REPO=file:///full/path/to/libprotobuf-mutator ../../llvm -$ ninja -v llvm-mc-assemble-proto-fuzzer llvm-mc-assemble-proto-to-asm - 2>&1 | tee log +$ ninja -v llvm-mc-assemble-proto-fuzzer llvm-mc-assemble-proto-to-asm \ + llvm-mc-assemble-proto-fuzzer-rv32i \ + llvm-mc-assemble-proto-to-asm-rv32i \ + 2>&1 | tee log ------------------------------------------------------------------------------- Setting Up: @@ -29,9 +31,10 @@ fuzzer with, including the corpus directory and the full path to objdump. For example: -$ python mcfuzz.py --corpus corpus --max-len 32 --runs 10 --triple riscv32 - --out outputdir --objdump /full/path/to/riscv32-unknown-linux-gnu-objdump - --mattr +c +$ python mcfuzz.py --corpus corpus --max-len 32 --runs 10 --triple riscv32 \ + --out outputdir --objdump /full/path/to/riscv32-unknown-linux-gnu-objdump \ + --mattr +c,+m --march rv32imc \ + --assemble /full/path/to/riscv32-unknown-linux-gnu-as Run the script with the --verbose flag if you would like to see the output of the fuzzer as it runs. @@ -63,3 +66,10 @@ The last part of the script runs a diff of all the .s and .objdump files, and prints out a summary of results (how many diffs passed and how many failed). + +------------------------------------------------------------------------------- +Notes: +------------------------------------------------------------------------------- +Currently, running the Python script will result in some failures as +instructions are given aliases by the golden disassembler, and therefore +the diff fails. Index: tools/llvm-mc-assemble-proto-fuzzer/asm_proto.proto =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/asm_proto.proto +++ /dev/null @@ -1,61 +0,0 @@ -//===-- asm_proto.proto - Protobuf description of ASM ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file describes a subset of ASM as a protobuf. It is used to -/// more easily find interesting inputs for fuzzing llvm mc layer. -/// -//===----------------------------------------------------------------------===// - -syntax = "proto2"; - -message Register { - enum RegName { - X0 = 1; X1 = 2; X2 = 3; X3 = 4; X4 = 5; X5 = 6; X6 = 7; X7 = 8; X8 = 9; - X9 = 10; X10 = 11; X11 = 12; X12 = 13; X13 = 14; X14 = 15; X15 = 16; - X16 = 17; X17 = 18; X18 = 19; X19 = 20; X20 = 21; X21 = 22; X22 = 23; - X23 = 24; X24 = 25; X25 = 26; X26 = 27; X27 = 28; X28 = 29; X29 = 30; - X30 = 31; X31 = 32; - }; - required RegName name = 1; -} - -message ITypeOpcode { - enum Op { - ADD = 1; SUB = 2; - }; - required Op op = 1; -} - -message ITypeOperands { - required Register operand1 = 1; - required Register operand2 = 2; - required Register operand3 = 3; -} - -message ITypeStatement { - required ITypeOpcode opcode = 1; - required ITypeOperands operands = 2; -} - -message AsmStatement { - oneof asmstatement_oneof { - ITypeStatement statement = 2; - } -} - -message AsmStatementSeq { - repeated AsmStatement statements = 1; -} - -message Assembly { - required AsmStatementSeq asmStatements = 1; -} - -package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/example_mcfuzz.py =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/example_mcfuzz.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python + +import argparse +import difflib +import os +import shlex +import subprocess +import sys +import tempfile + +parser = argparse.ArgumentParser() + +# Flags for the directory names, corpus and outputdir. +parser.add_argument("--corpus", type=str, help="corpus directory name", + required=True) +parser.add_argument("--out", type=str, + help="output directory name for obj files", default=None) + +# Flags for llvm-mc-assemble-proto-fuzzer: triple, mattr, filetype. +parser.add_argument("--triple", type=str, help="specify the triple", + default="riscv32") +parser.add_argument("--mattr", type=str, help="specify mattr", + default="") +parser.add_argument("--filetype", type=str, help="asm or obj", + default="obj") + +# These args are passed in after the -fuzzer-args flag. +parser.add_argument("--runs", type=int, help="number of runs", default=100) +parser.add_argument("--max-len", type=int, + help="limit instruction size for fuzzing", default=40) + +# Flag specifies the name of objdump executable. +parser.add_argument("--objdump", type=str, help="specify the path to" + " objdump", + default="/prj/llvm-arm/home/common/build_tools/" + "riscv32-gnu-7.2.0/bin/" + "riscv32-unknown-linux-gnu-objdump") + +# Flag specifies that we should print out everything +parser.add_argument("--verbose", dest="verbose", action="store_true") +parser.set_defaults(verbose=False) + +parser.add_argument("--riscv-no-aliases", dest="NoAliases", + action="store_true") +parser.set_defaults(NoAliases=False) + +args = parser.parse_args() + +# Step 1: Invoke fuzzer to generate a corpus. +cmd = 'llvm-mc-assemble-proto-fuzzer {corpus} -triple={triple} ' \ + + '-mattr={mattr} -filetype={filetype} -fuzzer-args ' \ + + '-runs={runs} -max_len={max_len}' +cmd = cmd.format(corpus=args.corpus, triple=args.triple, mattr=args.mattr, + filetype=args.filetype, runs=args.runs, max_len=args.max_len) +fuzz_command = shlex.split(cmd) + +fuzz_proc = subprocess.Popen(fuzz_command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) +fuzz_out, fuzz_err = fuzz_proc.communicate() +if fuzz_proc.returncode != 0: + raise ValueError('failed to run fuzz {}: {}'.format(fuzz_command, + fuzz_err)) +if args.verbose: + print(fuzz_out) + +# If user specified an output directory, proceed to step 2; otherwise, exit. +if args.out is None: + print("No output directory specified; exiting after populating " + "corpus directory.") + sys.exit(0) + +# Keep track of which diffs pass / fail. +passes = 0 +fails = 0 +list_of_failures = [] +# Iterate through the corpus body. +# For each file, generate a .s and a .o file. Then, use objdump to +# generate a .objdump file, which we will compare to the .s file. +# Diff the .s with the corresponding .objdump file. +for filename in os.listdir(args.corpus): + + filename_prefix = args.out + "/" + filename + + # Step 2: Run proto-to-asm on corpus file to generate .s file. + proto_to_asm_command = ["llvm-mc-assemble-proto-to-asm", + args.corpus + "/" + filename, + "-riscv-no-aliases=" + + str(args.NoAliases)] + asm_file = open(filename_prefix + ".s", "w+r") + asm_proc = subprocess.Popen(proto_to_asm_command, stdout=asm_file, + stderr=subprocess.STDOUT) + asm_out, asm_err = asm_proc.communicate() + if asm_proc.returncode != 0: + raise ValueError('failed to run {}: {}'.format(proto_to_asm_command, + asm_err)) + asm_file.close() + + # Step 3: Generate .o files in the outputdir. + cmd = 'llvm-mc-assemble-proto-fuzzer {corpus}/{file} -triple={triple}' \ + + ' -mattr={mattr} -out={out} -filetype=obj -fuzzer-args -runs=1' + cmd = cmd.format(corpus=args.corpus, file=filename, triple=args.triple, + mattr=args.mattr, out=args.out) + obj_files_command = shlex.split(cmd) + obj_proc = subprocess.Popen(obj_files_command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + obj_out, obj_err = obj_proc.communicate() + if obj_proc.returncode != 0: + raise ValueError('failed to run fuzzer {}: {}'.format( + obj_files_command, obj_err)) + if args.verbose: + print(obj_out) + + # Step 4: Call objdump on each .o file in the output directory, + # to generate corresponding .objdump files. + objdump_file = open(filename_prefix + ".objdump", "w+r") + cmd = '{objdump} -dr -M numeric {filename}.o' + cmd = cmd.format(objdump=args.objdump, filename=filename_prefix) + objdump_command = shlex.split(cmd) + objdump_proc = subprocess.Popen(objdump_command, + stdout=objdump_file, + stderr=subprocess.STDOUT) + objdump_out, objdump_err = objdump_proc.communicate() + if objdump_proc.returncode != 0: + raise ValueError('failed to run objdump {}: ' + + '{}'.format(objdump_command, objdump_err)) + + # Step 5: Process files generated by objdump so that the files only contain + # instructions. + objdump_file.seek(0) + temp_objdump_file = tempfile.TemporaryFile("w+r") + # Only the lines containing asm instructions have tabs in them; remove all + # other lines (for example, header lines and whitespace). + for line in objdump_file: + if "\t" in line: + temp_objdump_file.write(line) + objdump_file.close() + with open(filename_prefix + ".parsed_objdump", "w") as write_objdump_file: + temp_objdump_file.seek(0) + # Each line of the objdump output looks something like this: + # 0: 00318033 add x0,x3,x3 + # We remove the first two columns, leaving only the asm instruction. + for line in temp_objdump_file: + parts = line.split()[2:] + for part in parts: + write_objdump_file.write("\t") + write_objdump_file.write(part) + write_objdump_file.write("\n") + temp_objdump_file.close() + + # Step 6: Diff the .s file generated by proto-to-asm and the .objdump file + # generated by objdump. + prefix = "Checking " + filename + "..." + file1 = open(filename_prefix + ".parsed_s", "r") + file2 = open(filename_prefix + ".parsed_objdump", "r") + diff = difflib.ndiff(file1.readlines(), file2.readlines()) + delta = ''.join(x[0:] for x in diff if x.startswith(('- ', '+ '))) + if delta: + print(prefix + "FAILURE!") + fails = fails + 1 + list_of_failures.append(filename) + list_of_failures.append(delta) + else: + print(prefix + "SUCCESS!") + passes = passes + 1 + +print("Succeeded: " + str(passes) + "\t\tFailed: " + str(fails) + + "\t\t[Total: " + str(passes + fails) + "]") + +if (fails != 0): + print("The following files failed...") + for item in list_of_failures: + print(item) + +sys.exit(0 if fails == 0 else 1) Index: tools/llvm-mc-assemble-proto-fuzzer/mcfuzz.py =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/mcfuzz.py +++ tools/llvm-mc-assemble-proto-fuzzer/mcfuzz.py @@ -8,133 +8,259 @@ import sys import tempfile -parser = argparse.ArgumentParser() - -# Flags for the directory names, corpus and outputdir. -parser.add_argument("--corpus", type=str, help="corpus directory name", \ - required=True) -parser.add_argument("--out", type=str, \ - help="output directory name for obj files", default=None) - -# Flags for llvm-mc-assemble-proto-fuzzer: triple, mattr, filetype. -parser.add_argument("--triple", type=str, help="specify the triple", \ - default="riscv32") -parser.add_argument("--mattr", type=str, help="specify mattr", \ - default="") -parser.add_argument("--filetype", type=str, help="asm or obj", \ - default="obj") - -# These args are passed in after the -fuzzer-args flag. -parser.add_argument("--runs", type=int, help="number of runs", default=100) -parser.add_argument("--max-len", type=int, \ - help="limit instruction size for fuzzing", default=40) - -# Flag specifies the name of objdump executable. -parser.add_argument("--objdump", type=str, help="specify the path to" \ - " objdump", default="/prj/llvm-arm/home/common/build_tools/" \ - "riscv32-gnu-7.2.0/bin/riscv32-unknown-linux-gnu-objdump") - -# Flag specifies that we should print out everything -parser.add_argument("--verbose", dest="verbose", action="store_true") -parser.set_defaults(verbose=False) - -parser.add_argument("--riscv-no-aliases", dest="NoAliases", - action="store_true") -parser.set_defaults(NoAliases=False) - -args = parser.parse_args() - -# Step 1: Invoke fuzzer to generate a corpus. -cmd = 'llvm-mc-assemble-proto-fuzzer {corpus} -triple={triple} -mattr={mattr}' \ - + ' -filetype={filetype} -fuzzer-args -runs={runs} -max_len={max_len}' -cmd = cmd.format(corpus=args.corpus, triple=args.triple, mattr=args.mattr, \ - filetype=args.filetype, runs=args.runs, max_len=args.max_len) -fuzz_command = shlex.split(cmd) - -fuzz_proc = subprocess.Popen(fuzz_command, stdout=subprocess.PIPE, \ - stderr=subprocess.STDOUT) -fuzz_out, fuzz_err = fuzz_proc.communicate() -if fuzz_proc.returncode != 0: - raise ValueError('failed to run fuzz {}: {}'.format(fuzz_command, fuzz_err)) -if args.verbose: - print(fuzz_out) - -# If user specified an output directory, proceed to step 2; otherwise, exit. -if args.out is None: - print("No output directory specified; exiting after populating " - "corpus directory.") - sys.exit(0) - -# Keep track of which diffs pass / fail. -passes = 0 -fails = 0 -list_of_failures = [] -# Iterate through the corpus body. -# For each file, generate a .s and a .o file. Then, use objdump to -# generate a .objdump file, which we will compare to the .s file. -# Diff the .s with the corresponding .objdump file. -for filename in os.listdir(args.corpus): - filename_prefix = args.out + "/" + filename +def main(): + parser = argparse.ArgumentParser() + parse_arguments(parser) + args = parser.parse_args() + + # Step 1: Invoke fuzzer to generate a corpus. + call_fuzzer(args) + + # If user did not specify output dir, skip steps 2-8; exit. + if args.out is None: + print("No output directory specified; exiting after populating " + "corpus directory.") + sys.exit(0) + + # List of corpus files that both LLVM AS and GNU AS fail to assemble. + list_of_llvm_as_and_gnu_as_fails = [] + # List of corpus files that only LLVM AS fails to assemble. + list_of_llvm_as_fails = [] + # List of corpus files that only GNU AS fails to assemble. + list_of_gnu_as_fails = [] + # List of corpus files that both LLVM AS and GNU AS can assemble. + list_of_llvm_as_and_gnu_as_passes = [] + # Keep track of which diffs pass / fail. + passes = 0 + fails = 0 + list_of_diffs = [] + + # Iterate through the corpus body. + + # For each file, re-run the fuzzer to generate an object file with the fuzz + # target. Also run the golden assembler (gnu as) to generate a reference + # object file. If both assemblers behave the same way, proceed to generate + # a .s file (using proto-to-asm tool), disassemble the object file + # generated by the fuzz target (using objdump) and then compare the + # resulting .objdump with your .s file. + for filename in os.listdir(args.corpus): + filename_prefix = args.out + "/" + filename + # Step 2: Run fuzzer with filetype=obj; check for error in fuzz target. + target_error_occurred, out = call_fuzzer_on_corpus_file(args, filename) + + # Step 3: Run proto-to-asm on corpus file to generate .s file. + call_proto_to_asm(args, filename) + + # Step 4: Run golden assembler (gnu) and check for error. + golden_error_occurred = call_golden_assembler(args, filename) + + # Step 5: Compare behavior of fuzz target and golden assembler. + #################################################################### + #-- Initial, tentative interpretation of results, based on the --# + #-- status (pass/fail) of tools (LLVM AS, GNU AS, GNU OBJDUMP) --# + #------------------------------------------------------------------# + # LLVM AS | GNU AS | GNU OBJDUMP | Conclusion # + #------------------------------------------------------------------# + # 0 | 0 | x | invalid/unimplemented instr # + # 0 | 1 | x | LLVM MC bug/unimplemented instr # + # 1 | 0 | x | LLVM MC bug # + # 1 | 1 | 0 | LLVM MC bug and GCC bug # + # 1 | 1 | 1 | success # + #################################################################### + if target_error_occurred and golden_error_occurred: + print("Both assemblers failed to assemble file: " + filename) + list_of_llvm_as_and_gnu_as_fails.append(filename) + continue + elif target_error_occurred: + print("Only the target AS failed to assemble file: " + filename) + list_of_llvm_as_fails.append(filename) + out = out.split("\n") + for line in out: + if (line.startswith("error:")): + list_of_llvm_as_fails.append(line) + nextline = out[out.index(line) + 1] + list_of_llvm_as_fails.append(nextline) + continue + continue + elif golden_error_occurred: + print("Only the golden AS failed to assemble file: " + filename) + list_of_gnu_as_fails.append(filename) + continue + else: + print("Both assemblers assembled file: " + filename) + list_of_llvm_as_and_gnu_as_passes.append(filename) + + # Step 6: Call objdump on each .o file (generated by fuzz target) in + # the output directory, to generate corresponding .objdump files. + objdump_file = call_objdump(args, filename) + + # Step 7: Process files generated by objdump so that the files only + # contain instructions. + process_objdump_file(objdump_file, args, filename) + + # Step 8: Diff the .s file generated by proto-to-asm with .objdump file + # generated by objdump. + passes, fails, list_of_diffs = print_file_status(args, filename, + passes, fails, + list_of_diffs) + + print_result(passes, fails, list_of_llvm_as_and_gnu_as_fails, + list_of_llvm_as_fails, list_of_gnu_as_fails, + list_of_llvm_as_and_gnu_as_passes, list_of_diffs) + + sys.exit(0 if fails == 0 else 1) + + +def parse_arguments(parser): + # Flags for the directory names, corpus and outputdir. + parser.add_argument("--corpus", type=str, help="corpus directory name", + required=True) + parser.add_argument("--out", type=str, + help="output directory name for obj files", + default=None) + + # Flags for llvm-mc-assemble-proto-fuzzer: triple, mattr, filetype. + parser.add_argument("--triple", type=str, help="specify the triple", + default="riscv32") + parser.add_argument("--mattr", type=str, help="specify mattr", + default="") + parser.add_argument("--march", type=str, help="specify march", + default="rv32i") + parser.add_argument("--filetype", type=str, help="asm or obj", + default="obj") + + # These args are passed in after the -fuzzer-args flag. + parser.add_argument("--runs", type=int, help="number of runs", default=100) + parser.add_argument("--max-len", type=int, + help="limit instruction size for fuzzing", default=40) + + # Flag specifies the name of objdump executable. + parser.add_argument("--objdump", type=str, help="specify the path to" + " objdump", default="/prj/llvm-arm/home/common/" + "build_tools/riscv32-gnu-7.2.0/bin/" + "riscv32-unknown-linux-gnu-objdump") + + # Flag specifies the name of golden assembler executable. + parser.add_argument("--assemble", type=str, help="specify the path to" + " golden assembler", default="/prj/llvm-arm/home/" + "common/build_tools/riscv32-gnu-7.2.0/bin/" + "riscv32-unknown-linux-gnu-as") + + # Flag specifies that we should print out everything + parser.add_argument("--verbose", dest="verbose", action="store_true") + parser.set_defaults(verbose=False) + + parser.add_argument("--riscv-no-aliases", dest="NoAliases", + action="store_true") + parser.set_defaults(NoAliases=False) + + +# This function calls the llvm-mc-assemble-proto-fuzzer with a given extension +def call_fuzzer(args): + cmd = 'llvm-mc-assemble-proto-fuzzer-{march} {corpus} \ + -triple={triple} -mattr={mattr} -filetype={filetype} -fuzzer-args ' \ + + '-runs={runs} -max_len={max_len}' + cmd = cmd.format(march=args.march, corpus=args.corpus, triple=args.triple, + mattr=args.mattr, filetype=args.filetype, + runs=args.runs, max_len=args.max_len) + fuzz_command = shlex.split(cmd) - # Step 2: Run proto-to-asm on corpus file to generate .s file. - proto_to_asm_command = ["llvm-mc-assemble-proto-to-asm", \ - args.corpus + "/" + filename, "-riscv-no-aliases=" + - str(args.NoAliases)] + fuzz_proc = subprocess.Popen(fuzz_command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + fuzz_out, fuzz_err = fuzz_proc.communicate() + if fuzz_proc.returncode != 0: + raise ValueError('failed to run fuzz {}: {}'.format(fuzz_command, + fuzz_err)) + if args.verbose: + print(fuzz_out) + + +def call_fuzzer_on_corpus_file(args, filename): + target_error_occurred = False + cmd = 'llvm-mc-assemble-proto-fuzzer-{march} {corpus}/{file} ' \ + + '-triple={triple} -mattr={mattr} -out={out} ' \ + + '-filetype=obj -fuzzer-args -runs=1' + cmd = cmd.format(march=args.march, corpus=args.corpus, file=filename, + triple=args.triple, mattr=args.mattr, out=args.out) + target_as_command = shlex.split(cmd) + target_as_proc = subprocess.Popen(target_as_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + target_as_out, target_as_err = target_as_proc.communicate() + if target_as_proc.returncode != 0: + raise ValueError('failed to run fuzzer {}: {}'.format( + target_as_command, target_as_err)) + if target_as_out: + if "error" in target_as_out: + print("Fuzz Target failed to assemble this input: " + filename) + target_error_occurred = True + if args.verbose: + print(target_as_out) + return target_error_occurred, target_as_out + + +def call_proto_to_asm(args, filename): + filename_prefix = args.out + "/" + filename + cmd = 'llvm-mc-assemble-proto-to-asm-{march} {corpus}/{file}' \ + + ' -riscv-no-aliases={NoAliases}' + cmd = cmd.format(march=args.march, corpus=args.corpus, file=filename, + NoAliases=args.NoAliases) + proto_to_asm_command = shlex.split(cmd) asm_file = open(filename_prefix + ".s", "w+r") asm_proc = subprocess.Popen(proto_to_asm_command, stdout=asm_file, - stderr=subprocess.STDOUT) + stderr=subprocess.STDOUT) asm_out, asm_err = asm_proc.communicate() if asm_proc.returncode != 0: raise ValueError('failed to run {}: {}'.format(proto_to_asm_command, - asm_err)) - - # Step 3: Generate .o files in the outputdir. - cmd = 'llvm-mc-assemble-proto-fuzzer {corpus}/{file} -triple={triple}' \ - + ' -mattr={mattr} -out={out} -filetype=obj -fuzzer-args -runs=1' - cmd = cmd.format(corpus=args.corpus, file=filename, triple=args.triple, \ - mattr=args.mattr, out=args.out) - obj_files_command = shlex.split(cmd) - obj_proc = subprocess.Popen(obj_files_command, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - obj_out, obj_err = obj_proc.communicate() - if obj_proc.returncode != 0: - raise ValueError('failed to run fuzzer {}: {}'.format( \ - obj_files_command, obj_err)) - if args.verbose: - print(obj_out) + asm_err)) + elif asm_out: + print("Asm_out: " + asm_out) + asm_file.close() + + +def call_golden_assembler(args, filename): + golden_error_occurred = False + cmd = '{assemble} {dirname}/{filename}.s -march={march} ' \ + + '-o {dirname}/{filename}.out' + cmd = cmd.format(assemble=args.assemble, march=args.march, + dirname=args.out, filename=filename) + golden_as_command = shlex.split(cmd) + golden_as_proc = subprocess.Popen(golden_as_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + golden_as_out, golden_as_err = golden_as_proc.communicate() + if golden_as_proc.returncode != 0: + if (golden_as_out): + print("golden_as_out: " + golden_as_out) + golden_error_occurred = True + elif args.verbose: + print(golden_as_out) + return golden_error_occurred + - # Step 4: Call objdump on each .o file in the output directory, - # to generate corresponding .objdump files. +def call_objdump(args, filename): + filename_prefix = args.out + "/" + filename objdump_file = open(filename_prefix + ".objdump", "w+r") cmd = '{objdump} -dr -M numeric {filename}.o' cmd = cmd.format(objdump=args.objdump, filename=filename_prefix) objdump_command = shlex.split(cmd) objdump_proc = subprocess.Popen(objdump_command, - stdout=objdump_file, stderr=subprocess.STDOUT) + stdout=objdump_file, + stderr=subprocess.STDOUT) objdump_out, objdump_err = objdump_proc.communicate() if objdump_proc.returncode != 0: - raise ValueError('failed to run objdump {}: ' \ - + '{}'.format(objdump_command, objdump_err)) + raise ValueError('failed to run objdump {}: ' + + '{}'.format(objdump_command, objdump_err)) + return objdump_file - # Step 5: Process asm files so that files only contain asm instructions. - asm_file.seek(0) - lines = asm_file.readlines() - asm_file.close() - write_asm_file = open(filename_prefix + ".parsed_s", "w") - # Remove the first line of .s file, which contains the filename. For example: - # // corpus/dff0318decde43ce5065a4209412aa2c68d01318 - for line in lines: - if "\t" in line: - write_asm_file.write(line) - write_asm_file.close() - # Step 6: Process files generated by objdump so that the files only contain - # instructions. +def process_objdump_file(objdump_file, args, filename): + filename_prefix = args.out + "/" + filename objdump_file.seek(0) temp_objdump_file = tempfile.TemporaryFile("w+r") - # Only the lines containing asm instructions have tabs in them; remove all - # other lines (for example, header lines and whitespace). + # Only the lines containing asm instructions have tabs in them; remove + # all other lines (for example, header lines and whitespace). for line in objdump_file: if "\t" in line: temp_objdump_file.write(line) @@ -152,28 +278,65 @@ write_objdump_file.write("\n") temp_objdump_file.close() - # Step 7: Diff the .s file generated by proto-to-asm and the .objdump file - # generated by objdump. + +def print_file_status(args, filename, passes, fails, list_of_diffs): + filename_prefix = args.out + "/" + filename prefix = "Checking " + filename + "..." - file1 = open(filename_prefix + ".parsed_s", "r") + file1 = open(filename_prefix + ".s", "r") file2 = open(filename_prefix + ".parsed_objdump", "r") diff = difflib.ndiff(file1.readlines(), file2.readlines()) delta = ''.join(x[0:] for x in diff if x.startswith(('- ', '+ '))) if delta: print(prefix + "FAILURE!") fails = fails + 1 - list_of_failures.append(filename) - list_of_failures.append(delta) + list_of_diffs.append(filename) + list_of_diffs.append(delta) else: print(prefix + "SUCCESS!") passes = passes + 1 + file1.close() + file2.close() + return passes, fails, list_of_diffs + + +def print_result(passes, fails, list1, list2, list3, list4, list5): + # Both llvm-mc-assembler and golden (gnu) assembler fail + count1 = 0 + for item in list1: + count1 = count1 + 1 + + count2 = 0 + # Target assembler llvm-mc fails + for item in list2: + count2 = count2 + 1 + + if (count2 != 0): + print("Target LLVM AS failed to assemble these inputs...") + for item in list2: + print(item) + + count3 = 0 + # Golden (gnu) assembler fails + for item in list3: + count3 = count3 + 1 -print("Succeeded: " + str(passes) + "\t\tFailed: " + str(fails) \ - + "\t\t[Total: " + str(passes + fails) + "]") + count4 = 0 + # Both assemblers pass + for item in list4: + count4 = count4 + 1 -if (fails != 0): - print("The following files failed...") - for item in list_of_failures: - print(item) + if (fails != 0): + print("Target LLVM AS and golden GNU AS both assembled " + "these inputs, but input ASM string differed from" + "GNU disassembler-generated ASM string...") + for item in list5: + print(item) + print("Both llvm as and gnu assembler fail: {}".format(count1)) + print("Only llvm as fails: {}".format(count2)) + print("Only gnu as fails: {}".format(count3)) + print("Both llvm as and gnu assembler pass: {}".format(count4)) + print("[Succeeded: {}/{}\tFailed: {}/{}]".format(passes, + count4, fails, count4)) -sys.exit(0 if fails == 0 else 1) +if __name__ == "__main__": + main() Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/asm_proto.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/asm_proto.proto @@ -0,0 +1,63 @@ +//===-- asm_proto.proto - Protobuf description of ASM ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes a subset of ASM as a protobuf. It is used to +/// more easily find interesting inputs for fuzzing llvm mc layer. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message Register { + enum RegName { + X0 = 1; X1 = 2; X2 = 3; X3 = 4; X4 = 5; + X5 = 6; X6 = 7; X7 = 8; X8 = 9; X9 = 10; + X10 = 11; X11 = 12; X12 = 13; X13 = 14; X14 = 15; + X15 = 16; X16 = 17; X17 = 18; X18 = 19; X19 = 20; + X20 = 21; X21 = 22; X22 = 23; X23 = 24; X24 = 25; + X25 = 26; X26 = 27; X27 = 28; X28 = 29; X29 = 30; + X30 = 31; X31 = 32; + }; + required RegName name = 1; +} + +message RTypeOpcode { + enum Op { + ADD = 1; SUB = 2; + }; + required Op op = 1; +} + +message RTypeOperands { + required Register operand1 = 1; + required Register operand2 = 2; + required Register operand3 = 3; +} + +message RTypeStatement { + required RTypeOpcode opcode = 1; + required RTypeOperands operands = 2; +} + +message AsmStatement { + oneof asmstatement_oneof { + RTypeStatement statement = 2; + } +} + +message AsmStatementSeq { + repeated AsmStatement statements = 1; +} + +message Assembly { + required AsmStatementSeq asmStatements = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32.proto @@ -0,0 +1,49 @@ +//===-- rv32.proto - Protobuf description of ASM --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file serves as the base for the protobuf representation of the RV32I +/// ISA of RISC-V ASM, with R,I,S,B,U,J-type instructions. It is used to more +/// easily find interesting inputs for fuzzing the llvm mc layer. +/// The fuzzer currently supports all the instructions in the RV32I ISA, +/// except for the following instructions: FENCE, FENCE.I, ECALL, EBREAK, +/// CSRRW, CSRRS, CSRRC, CSRRWI, CSRRSI, and CSRRCI. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +import "rv32c.proto"; +import "rv32i.proto"; +import "rv32_operands.proto"; +import "rv32m.proto"; + +message AsmStatement { + oneof asmstatement_oneof { + RTypeStatement statement1 = 1; + RTypeMulStatement statement2 = 2; + ITypeStatement statement3 = 3; + ITypeLoadStatement statement4 = 4; + STypeStatement statement5 = 5; + BTypeStatement statement6 = 6; + UTypeStatement statement7 = 7; + JTypeStatement statement8 = 8; + CTypeStatement statement9 = 9; + } +} + +message AsmStatementSeq { + repeated AsmStatement statements = 1; +} + +message Assembly { + required AsmStatementSeq asmStatements = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32_operands.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32_operands.proto @@ -0,0 +1,38 @@ +//===-- rv32_operands.proto - Protobuf description of ASM -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the operands (Registers and Immediates) for the RISC-V +/// ASM, in Protobuf form. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message Immediate { + oneof imm_oneof { + sint32 s_imm = 1; + uint32 u_imm = 2; + } +} + +message Register { + enum RegName { + X0 = 0; X1 = 1; X2 = 2; X3 = 3; X4 = 4; + X5 = 5; X6 = 6; X7 = 7; X8 = 8; X9 = 9; + X10 = 10; X11 = 11; X12 = 12; X13 = 13; X14 = 14; + X15 = 15; X16 = 16; X17 = 17; X18 = 18; X19 = 19; + X20 = 20; X21 = 21; X22 = 22; X23 = 23; X24 = 24; + X25 = 25; X26 = 26; X27 = 27; X28 = 28; X29 = 29; + X30 = 30; X31 = 31; + }; + required RegName name = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32c.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32c.proto @@ -0,0 +1,97 @@ +//===-- rv32c.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes a subset of the RISC-V ASM language; it uses Protobuf to +/// represent the compressed instructions of the RV32I ISA (C-extension). +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +import "rv32_operands.proto"; + +message COneRegOpcode { + enum Op { + CJR = 0; CJALR = 1; + } + required Op op = 1; +} + +message COneImmOpcode { + enum Op { + CJ = 0; CJAL = 1; + } + required Op op = 1; +} + +message CTwoRegOpcode { + enum Op { + CADD = 0; CMV = 1; CSUB = 2; CXOR = 3; COR = 4; + CAND = 5; + } + required Op op = 1; +} + +message CTwoOpOpcode { + enum Op { + CADDI = 0; CLI = 1; CADDI4SPN = 2; CSLLI = 3; CSRLI = 4; + CSRAI = 5; CANDI = 6; CLWSP = 7; CSWSP = 8; CBEQZ = 9; + CBNEZ = 10; CADDI16SP = 11; + } + required Op op = 1; +} + +message CThreeOpOpcode { + enum Op { + CLW = 0; CSW = 1; + } + required Op op = 1; +} + +message COneImmStatement { + required COneImmOpcode opcode = 1; + required Immediate operand1 = 2; +} + +message COneRegStatement { + required COneRegOpcode opcode = 1; + required Register operand1 = 2; +} + +message CTwoOpStatement { + required CTwoOpOpcode opcode = 1; + required Register operand1 = 2; + required Immediate operand2 = 3; +} + +message CTwoRegStatement { + required CTwoRegOpcode opcode = 1; + required Register operand1 = 2; + required Register operand2 = 3; +} + +message CThreeOpStatement { + required CThreeOpOpcode opcode = 1; + required Register operand1 = 2; + required Immediate operand2 = 3; + required Register operand3 = 4; +} + +message CTypeStatement { + oneof compressed_oneof { + COneImmStatement statement1 = 1; + COneRegStatement statement2 = 2; + CTwoOpStatement statement3 = 3; + CTwoRegStatement statement4 = 4; + CThreeOpStatement statement5 = 5; + } +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32i.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32i.proto @@ -0,0 +1,147 @@ +//===-- rv32i.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes a subset of the RISC-V ASM language; it uses Protobuf to +/// represent the base ISA, RV32I. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +import "rv32_operands.proto"; + +message RTypeOpcode { + enum Op { + ADD = 0; SUB = 1; SLL = 2; SLT = 3; SLTU = 4; + XOR = 5; SRL = 6; SRA = 7; OR = 8; AND = 9; + }; + required Op op = 1; +} + +message ITypeOpcode { + enum Op { + ADDI = 0; SLTI = 1; ANDI = 2; ORI = 3; XORI = 4; + SLTIU = 5; JALR = 6; SRAI = 7; SRLI = 8; SLLI = 9; + }; + required Op op = 1; +} + +message ITypeLoadOpcode { + enum Op { + LB = 0; LH = 1; LW = 2; LBU = 3; LHU = 4; + }; + required Op op = 1; +} + +message STypeOpcode { + enum Op { + SW = 0; SH = 1; SB = 2; + }; + required Op op = 1; +} + +message BTypeOpcode { + enum Op { + BEQ = 0; BNE = 1; BGE = 2; BLT = 3; BGEU = 4; + BLTU = 5; + }; + required Op op = 1; +} + +message UTypeOpcode { + enum Op { + LUI = 0; AUIPC = 1; + }; + required Op op = 1; +} + +message JTypeOpcode { + enum Op { + JAL = 0; + }; + required Op op = 1; +} + +message RTypeOperands { + required Register operand1 = 1; + required Register operand2 = 2; + required Register operand3 = 3; +} + +message RTypeStatement { + required RTypeOpcode opcode = 1; + required RTypeOperands operands = 2; +} + +message ITypeOperands { + required Register operand1 = 1; + required Register operand2 = 2; + required Immediate operand3 = 3; +} + +message ITypeStatement { + required ITypeOpcode opcode = 1; + required ITypeOperands operands = 2; +} + +message ITypeLoadOperands { + required Register operand1 = 1; + required Register operand2 = 2; + required Immediate operand3 = 3; +} + +message ITypeLoadStatement { + required ITypeLoadOpcode opcode = 1; + required ITypeLoadOperands operands = 2; +} + +message STypeOperands { + required Register operand1 = 1; + required Register operand2 = 2; + required Immediate operand3 = 3; +} + +message STypeStatement { + required STypeOpcode opcode = 1; + required STypeOperands operands = 2; +} + +message BTypeOperands { + required Register operand1 = 1; + required Register operand2 = 2; + required Immediate operand3 = 3; +} + +message BTypeStatement { + required BTypeOpcode opcode = 1; + required BTypeOperands operands = 2; +} + +message UTypeOperands { + required Register operand1 = 1; + required Immediate operand2 = 2; +} + +message UTypeStatement { + required UTypeOpcode opcode = 1; + required UTypeOperands operands = 2; +} + +message JTypeOperands { + required Register operand1 = 1; + required Immediate operand2 = 2; +} + +message JTypeStatement { + required JTypeOpcode opcode = 1; + required JTypeOperands operands = 2; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32i_inst.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32i_inst.proto @@ -0,0 +1,97 @@ +//===-- rv32i_inst.proto - Protobuf description of ASM --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes a subset of the RISC-V ASM language; it uses Protobuf to +/// represent the base ISA, RV32I. These instructions are enumerated in +/// rv32i_inst.proto. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +import "rv32i_operands.proto"; +import "rv32i_opcode.proto"; + +message RTypeOperands { + required Register operand1 = 1; + required Register operand2 = 2; + required Register operand3 = 3; +} + +message RTypeStatement { + required RTypeOpcode opcode = 1; + required RTypeOperands operands = 2; +} + +message ITypeOperands { + required Register operand1 = 1; + required Register operand2 = 2; + required Immediate operand3 = 3; +} + +message ITypeStatement { + required ITypeOpcode opcode = 1; + required ITypeOperands operands = 2; +} + +message ITypeLoadOperands { + required Register operand1 = 1; + required Register operand2 = 2; + required Immediate operand3 = 3; +} + +message ITypeLoadStatement { + required ITypeLoadOpcode opcode = 1; + required ITypeLoadOperands operands = 2; +} + +message STypeOperands { + required Register operand1 = 1; + required Register operand2 = 2; + required Immediate operand3 = 3; +} + +message STypeStatement { + required STypeOpcode opcode = 1; + required STypeOperands operands = 2; +} + +message BTypeOperands { + required Register operand1 = 1; + required Register operand2 = 2; + required Immediate operand3 = 3; +} + +message BTypeStatement { + required BTypeOpcode opcode = 1; + required BTypeOperands operands = 2; +} + +message UTypeOperands { + required Register operand1 = 1; + required Immediate operand2 = 2; +} + +message UTypeStatement { + required UTypeOpcode opcode = 1; + required UTypeOperands operands = 2; +} + +message JTypeOperands { + required Register operand1 = 1; + required Immediate operand2 = 2; +} + +message JTypeStatement { + required JTypeOpcode opcode = 1; + required JTypeOperands operands = 2; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32i_opcode.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32i_opcode.proto @@ -0,0 +1,72 @@ +//===-- rv32i_opcode.proto - Protobuf description of ASM ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes a subset of the RISC-V ASM language; it uses Protobuf to +/// enumerate the instructions of the RV32I ISA. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +import "rv32i_operands.proto"; + +message RTypeOpcode { + enum Op { + ADD = 0; SUB = 1; SLL = 2; SLT = 3; SLTU = 4; + XOR = 5; SRL = 6; SRA = 7; OR = 8; AND = 9; + }; + required Op op = 1; +} + +message ITypeOpcode { + enum Op { + ADDI = 0; SLTI = 1; ANDI = 2; ORI = 3; XORI = 4; + SLTIU = 5; JALR = 6; SRAI = 7; SRLI = 8; SLLI = 9; + }; + required Op op = 1; +} + +message ITypeLoadOpcode { + enum Op { + LB = 0; LH = 1; LW = 2; LBU = 3; LHU = 4; + }; + required Op op = 1; +} + +message STypeOpcode { + enum Op { + SW = 0; SH = 1; SB = 2; + }; + required Op op = 1; +} + +message BTypeOpcode { + enum Op { + BEQ = 0; BNE = 1; BGE = 2; BLT = 3; BGEU = 4; + BLTU = 5; + }; + required Op op = 1; +} + +message UTypeOpcode { + enum Op { + LUI = 0; AUIPC = 1; + }; + required Op op = 1; +} + +message JTypeOpcode { + enum Op { + JAL = 0; + }; + required Op op = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32i_operands.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32i_operands.proto @@ -0,0 +1,38 @@ +//===-- rv32i_operands.proto - Protobuf description of ASM ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the operands (Registers and Immediates) for the RISC-V +/// ASM, in Protobuf form. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +message Immediate { + oneof imm_oneof { + sint32 s_imm = 1; + uint32 u_imm = 2; + } +} + +message Register { + enum RegName { + X0 = 0; X1 = 1; X2 = 2; X3 = 3; X4 = 4; + X5 = 5; X6 = 6; X7 = 7; X8 = 8; X9 = 9; + X10 = 10; X11 = 11; X12 = 12; X13 = 13; X14 = 14; + X15 = 15; X16 = 16; X17 = 17; X18 = 18; X19 = 19; + X20 = 20; X21 = 21; X22 = 22; X23 = 23; X24 = 24; + X25 = 25; X26 = 26; X27 = 27; X28 = 28; X29 = 29; + X30 = 30; X31 = 31; + }; + required RegName name = 1; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32m.proto =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-files/rv32m.proto @@ -0,0 +1,35 @@ +//===-- rv32m.proto - Protobuf description of ASM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes a subset of the RISC-V ASM language; it uses Protobuf to +/// represent the integer mul/div instructions of the RV32I ISA (M-extension), +/// which are enumerated in rv32m_inst.proto. +/// +//===----------------------------------------------------------------------===// + +syntax = "proto2"; + +import "rv32i.proto"; +import "rv32_operands.proto"; + +message RTypeMulOpcode { + enum Op { + MUL = 10; MULH = 11; MULHSU = 12; MULHU = 13; DIV = 14; + DIVU = 15; REM = 16; REMU = 17; + }; + required Op op = 1; +} + +message RTypeMulStatement { + required RTypeMulOpcode opcode = 1; + required RTypeOperands operands = 2; +} + +package mc_proto_fuzzer; Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/CMakeLists.txt =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/CMakeLists.txt +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/CMakeLists.txt @@ -2,13 +2,23 @@ set(CMAKE_CXX_FLAGS ${CXX_FLAGS_NOFUZZ}) # Needed by LLVM's CMake checks because this file defines multiple targets. -set(LLVM_OPTIONAL_SOURCES proto_to_asm.cpp proto_to_asm_main.cpp) +set(LLVM_OPTIONAL_SOURCES proto_to_asm.cpp proto_to_asm_main.cpp + proto_to_asm_rv32.cpp) add_clang_library(mcProtoToASM proto_to_asm.cpp DEPENDS mcASMProto LINK_LIBS mcASMProto ${PROTOBUF_LIBRARIES} ) +add_clang_library(mcRv32ProtoToASM proto_to_asm_rv32.cpp + DEPENDS mcRv32ASMProto + LINK_LIBS mcRv32ASMProto ${PROTOBUF_LIBRARIES} + ) + add_clang_executable(llvm-mc-assemble-proto-to-asm proto_to_asm_main.cpp) +add_clang_executable(llvm-mc-assemble-proto-to-asm-rv32 + proto_to_asm_main.cpp) target_link_libraries(llvm-mc-assemble-proto-to-asm PRIVATE mcProtoToASM) +target_link_libraries(llvm-mc-assemble-proto-to-asm-rv32 + PRIVATE mcRv32ProtoToASM) Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.h =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.h +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.h @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#include -#include #include namespace mc_proto_fuzzer { Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.cpp =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.cpp +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm.cpp @@ -20,18 +20,19 @@ namespace mc_proto_fuzzer { static bool NoAliases = false; + std::ostream &operator<<(std::ostream &OS, const Register &X) { switch (X.name()) { - case Register::X0: OS << "x0"; break; - case Register::X1: OS << "x1"; break; - case Register::X2: OS << "x2"; break; - case Register::X3: OS << "x3"; break; - case Register::X4: OS << "x4"; break; - case Register::X5: OS << "x5"; break; - case Register::X6: OS << "x6"; break; - case Register::X7: OS << "x7"; break; - case Register::X8: OS << "x8"; break; - case Register::X9: OS << "x9"; break; + case Register::X0: OS << "x0"; break; + case Register::X1: OS << "x1"; break; + case Register::X2: OS << "x2"; break; + case Register::X3: OS << "x3"; break; + case Register::X4: OS << "x4"; break; + case Register::X5: OS << "x5"; break; + case Register::X6: OS << "x6"; break; + case Register::X7: OS << "x7"; break; + case Register::X8: OS << "x8"; break; + case Register::X9: OS << "x9"; break; case Register::X10: OS << "x10"; break; case Register::X11: OS << "x11"; break; case Register::X12: OS << "x12"; break; @@ -57,27 +58,27 @@ } return OS; } -std::ostream &operator<<(std::ostream &OS, const ITypeOpcode &X) { +std::ostream &operator<<(std::ostream &OS, const RTypeOpcode &X) { switch (X.op()) { - case ITypeOpcode_Op_ADD: OS << "add"; break; - case ITypeOpcode_Op_SUB: OS << "sub"; break; + case RTypeOpcode_Op_ADD: OS << "add"; break; + case RTypeOpcode_Op_SUB: OS << "sub"; break; } return OS; } -std::ostream &operator<<(std::ostream &OS, const ITypeOperands &X) { +std::ostream &operator<<(std::ostream &OS, const RTypeOperands &X) { OS << X.operand1(); OS << "," << X.operand2(); OS << "," << X.operand3(); return OS; } -std::ostream &operator<<(std::ostream &OS, const ITypeStatement &X) { +std::ostream &operator<<(std::ostream &OS, const RTypeStatement &X) { OS << "\t" << X.opcode() << "\t"; OS << X.operands() << "\n"; return OS; } std::ostream &operator<<(std::ostream &OS, const AsmStatement &X) { if (X.has_statement()) { - ITypeOperands ThreeOperands = X.statement().operands(); + RTypeOperands ThreeOperands = X.statement().operands(); Register Oper1 = ThreeOperands.operand1(); Register Oper2 = ThreeOperands.operand2(); Register Oper3 = ThreeOperands.operand3(); @@ -85,12 +86,7 @@ OS << X.statement(); return OS; } - if (X.statement().opcode().op() == ITypeOpcode_Op_ADD) { - OS << X.statement(); - return OS; - } - else if (X.statement().opcode().op() == - ITypeOpcode_Op_SUB) { + if (X.statement().opcode().op() == RTypeOpcode_Op_SUB) { if (Oper2.name() == Register_RegName_X0) { OS << "\tneg\t" << Oper1 << "," << Oper3 << "\n"; return OS; @@ -98,7 +94,8 @@ OS << X.statement(); return OS; } - } + } else + return OS << X.statement(); } return OS; } Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_main.cpp =================================================================== --- tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_main.cpp +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_main.cpp @@ -43,7 +43,6 @@ std::fstream in(NewArgv[i]); std::string str((std::istreambuf_iterator(in)), std::istreambuf_iterator()); - std::cout << "// " << NewArgv[i] << std::endl; std::cout << mc_proto_fuzzer::ProtoToASM( reinterpret_cast(str.data()), str.size(), (bool) NoAliases); Index: tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_rv32.cpp =================================================================== --- /dev/null +++ tools/llvm-mc-assemble-proto-fuzzer/proto-to-asm/proto_to_asm_rv32.cpp @@ -0,0 +1,405 @@ +//==-- proto_to_asm_rv32i.cpp - Protobuf-ASM conversion --------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements functions for converting between protobufs for the assembly +// language grammar for RV32I instruction set and assembly language +// instructions. +// +//===----------------------------------------------------------------------===// +#include "proto_to_asm.h" +#include "rv32c.pb.h" +#include "rv32i.pb.h" +#include "rv32_operands.pb.h" +#include "rv32.pb.h" +#include "rv32m.pb.h" + +#include +#include + +namespace mc_proto_fuzzer { + +static bool NoAliases = false; + +std::ostream &operator<<(std::ostream &OS, const Immediate &X) { + if (X.has_s_imm()) + return OS << X.s_imm(); + else + return OS << X.u_imm(); +} +std::ostream &operator<<(std::ostream &OS, const Register &X) { + switch (X.name()) { + case Register::X0: OS << "x0"; break; + case Register::X1: OS << "x1"; break; + case Register::X2: OS << "x2"; break; + case Register::X3: OS << "x3"; break; + case Register::X4: OS << "x4"; break; + case Register::X5: OS << "x5"; break; + case Register::X6: OS << "x6"; break; + case Register::X7: OS << "x7"; break; + case Register::X8: OS << "x8"; break; + case Register::X9: OS << "x9"; break; + case Register::X10: OS << "x10"; break; + case Register::X11: OS << "x11"; break; + case Register::X12: OS << "x12"; break; + case Register::X13: OS << "x13"; break; + case Register::X14: OS << "x14"; break; + case Register::X15: OS << "x15"; break; + case Register::X16: OS << "x16"; break; + case Register::X17: OS << "x17"; break; + case Register::X18: OS << "x18"; break; + case Register::X19: OS << "x19"; break; + case Register::X20: OS << "x20"; break; + case Register::X21: OS << "x21"; break; + case Register::X22: OS << "x22"; break; + case Register::X23: OS << "x23"; break; + case Register::X24: OS << "x24"; break; + case Register::X25: OS << "x25"; break; + case Register::X26: OS << "x26"; break; + case Register::X27: OS << "x27"; break; + case Register::X28: OS << "x28"; break; + case Register::X29: OS << "x29"; break; + case Register::X30: OS << "x30"; break; + case Register::X31: OS << "x31"; break; + default: assert(!"The default case was reached."); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RTypeMulOpcode &X) { + switch (X.op()) { + case RTypeMulOpcode_Op_MUL: OS << "mul"; break; + case RTypeMulOpcode_Op_MULH: OS << "mulh"; break; + case RTypeMulOpcode_Op_MULHSU: OS << "mulhsu"; break; + case RTypeMulOpcode_Op_MULHU: OS << "mulhu"; break; + case RTypeMulOpcode_Op_DIV: OS << "div"; break; + case RTypeMulOpcode_Op_DIVU: OS << "divu"; break; + case RTypeMulOpcode_Op_REM: OS << "rem"; break; + case RTypeMulOpcode_Op_REMU: OS << "remu"; break; + default: assert(!"The default case was reached."); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RTypeOpcode &X) { + switch (X.op()) { + case RTypeOpcode_Op_ADD: OS << "add"; break; + case RTypeOpcode_Op_SUB: OS << "sub"; break; + case RTypeOpcode_Op_SLL: OS << "sll"; break; + case RTypeOpcode_Op_SLT: OS << "slt"; break; + case RTypeOpcode_Op_SLTU: OS << "sltu"; break; + case RTypeOpcode_Op_XOR: OS << "xor"; break; + case RTypeOpcode_Op_SRL: OS << "srl"; break; + case RTypeOpcode_Op_SRA: OS << "sra"; break; + case RTypeOpcode_Op_OR: OS << "or"; break; + case RTypeOpcode_Op_AND: OS << "and"; break; + default: assert(!"The default case was reached."); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RTypeOperands &X) { + OS << X.operand1(); + OS << "," << X.operand2(); + OS << "," << X.operand3(); + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RTypeMulStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operands() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const RTypeStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operands() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const ITypeLoadOpcode &X) { + switch (X.op()) { + case ITypeLoadOpcode_Op_LB: OS << "lb"; break; + case ITypeLoadOpcode_Op_LH: OS << "lh"; break; + case ITypeLoadOpcode_Op_LW: OS << "lw"; break; + case ITypeLoadOpcode_Op_LBU: OS << "lbu"; break; + case ITypeLoadOpcode_Op_LHU: OS << "lhu"; break; + default: assert(!"The default case was reached."); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const ITypeLoadOperands &X) { + OS << X.operand1(); + OS << "," << X.operand3(); + OS << "(" << X.operand2() << ")"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const ITypeLoadStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operands() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const ITypeOpcode &X) { + switch (X.op()) { + case ITypeOpcode_Op_ADDI: OS << "addi"; break; + case ITypeOpcode_Op_SLTI: OS << "slti"; break; + case ITypeOpcode_Op_ANDI: OS << "andi"; break; + case ITypeOpcode_Op_ORI: OS << "ori"; break; + case ITypeOpcode_Op_XORI: OS << "xori"; break; + case ITypeOpcode_Op_SLTIU: OS << "sltiu"; break; + case ITypeOpcode_Op_JALR: OS << "jalr"; break; + case ITypeOpcode_Op_SRAI: OS << "srai"; break; + case ITypeOpcode_Op_SRLI: OS << "srli"; break; + case ITypeOpcode_Op_SLLI: OS << "slli"; break; + default: assert(!"The default case was reached."); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const ITypeOperands &X) { + OS << X.operand1(); + OS << "," << X.operand2(); + OS << "," << X.operand3(); + return OS; +} +std::ostream &operator<<(std::ostream &OS, const ITypeStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operands() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const STypeOpcode &X) { + switch (X.op()) { + case STypeOpcode_Op_SW: OS << "sw"; break; + case STypeOpcode_Op_SH: OS << "sh"; break; + case STypeOpcode_Op_SB: OS << "sb"; break; + default: assert(!"The default case was reached."); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const STypeOperands &X) { + OS << X.operand1(); + OS << "," << X.operand3(); + OS << "(" << X.operand2() << ")"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const STypeStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operands() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const BTypeOpcode &X) { + switch (X.op()) { + case BTypeOpcode_Op_BEQ: OS << "beq"; break; + case BTypeOpcode_Op_BNE: OS << "bne"; break; + case BTypeOpcode_Op_BGE: OS << "bge"; break; + case BTypeOpcode_Op_BLT: OS << "blt"; break; + case BTypeOpcode_Op_BGEU: OS << "bgeu"; break; + case BTypeOpcode_Op_BLTU: OS << "bltu"; break; + default: assert(!"The default case was reached."); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const BTypeOperands &X) { + OS << X.operand1(); + OS << "," << X.operand2(); + OS << "," << X.operand3(); + return OS; +} +std::ostream &operator<<(std::ostream &OS, const BTypeStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operands() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const UTypeOpcode &X) { + switch (X.op()) { + case UTypeOpcode_Op_LUI: OS << "lui"; break; + case UTypeOpcode_Op_AUIPC: OS << "auipc"; break; + default: assert(!"The default case was reached."); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const UTypeOperands &X) { + OS << X.operand1(); + OS << "," << X.operand2(); + return OS; +} +std::ostream &operator<<(std::ostream &OS, const UTypeStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operands() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const JTypeOpcode &X) { + switch (X.op()) { + case JTypeOpcode_Op_JAL: OS << "jal"; break; + default: assert(!"The default case was reached."); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const JTypeOperands &X) { + OS << X.operand1(); + OS << "," << X.operand2(); + return OS; +} +std::ostream &operator<<(std::ostream &OS, const JTypeStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operands() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const COneRegOpcode &X) { + switch (X.op()) { + case COneRegOpcode_Op_CJR: OS << "c.jr"; break; + case COneRegOpcode_Op_CJALR: OS << "c.jalr"; break; + default: assert(!"The default case was reached."); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const COneImmOpcode &X) { + switch (X.op()) { + case COneImmOpcode_Op_CJ: OS << "c.j"; break; + case COneImmOpcode_Op_CJAL: OS << "c.jal"; break; + default: assert(!"The default case was reached."); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const CTwoOpOpcode &X) { + switch (X.op()) { + case CTwoOpOpcode_Op_CADDI: OS << "c.addi"; break; + case CTwoOpOpcode_Op_CLI: OS << "c.li"; break; + case CTwoOpOpcode_Op_CADDI4SPN: OS << "c.addi4spn"; break; + case CTwoOpOpcode_Op_CSLLI: OS << "c.slli"; break; + case CTwoOpOpcode_Op_CSRLI: OS << "c.srli"; break; + case CTwoOpOpcode_Op_CSRAI: OS << "c.srai"; break; + case CTwoOpOpcode_Op_CANDI: OS << "c.andi"; break; + case CTwoOpOpcode_Op_CLWSP: OS << "c.lwsp"; break; + case CTwoOpOpcode_Op_CSWSP: OS << "c.swsp"; break; + case CTwoOpOpcode_Op_CBEQZ: OS << "c.beqz"; break; + case CTwoOpOpcode_Op_CBNEZ: OS << "c.bnez"; break; + case CTwoOpOpcode_Op_CADDI16SP: OS << "c.addi16sp"; break; + default: assert(!"The default case was reached."); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const CTwoRegOpcode &X) { + switch (X.op()) { + case CTwoRegOpcode_Op_CADD: OS << "c.add"; break; + case CTwoRegOpcode_Op_CMV: OS << "c.mv"; break; + case CTwoRegOpcode_Op_CSUB: OS << "c.sub"; break; + case CTwoRegOpcode_Op_CXOR: OS << "c.xor"; break; + case CTwoRegOpcode_Op_COR: OS << "c.or"; break; + case CTwoRegOpcode_Op_CAND: OS << "c.and"; break; + default: assert(!"The default case was reached."); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const CThreeOpOpcode &X) { + switch (X.op()) { + case CThreeOpOpcode_Op_CLW: OS << "c.lw"; break; + case CThreeOpOpcode_Op_CSW: OS << "c.sw"; break; + default: assert(!"The default case was reached."); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const COneImmStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operand1() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const COneRegStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operand1() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const CTwoOpStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operand1() << "," << X.operand2() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const CTwoRegStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operand1() << "," << X.operand2() << "\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const CThreeOpStatement &X) { + OS << "\t" << X.opcode() << "\t"; + OS << X.operand1() << "," << X.operand2() << "(" << X.operand3() << ")\n"; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const CTypeStatement &X) { + if (X.has_statement1()) + return OS << X.statement1(); + else if (X.has_statement2()) + return OS << X.statement2(); + else if (X.has_statement3()) + return OS << X.statement3(); + else if (X.has_statement4()) + return OS << X.statement4(); + else + return OS << X.statement5(); +} +std::ostream &operator<<(std::ostream &OS, const AsmStatement &X) { + if (X.has_statement1()) { + RTypeOperands ThreeOperands = X.statement1().operands(); + Register Oper1 = ThreeOperands.operand1(); + Register Oper2 = ThreeOperands.operand2(); + Register Oper3 = ThreeOperands.operand3(); + if (NoAliases) { + OS << X.statement1(); + return OS; + } + if (X.statement1().opcode().op() == RTypeOpcode_Op_ADD) { + OS << X.statement1(); + return OS; + } + else if (X.statement1().opcode().op() == + RTypeOpcode_Op_SUB) { + if (Oper2.name() == Register_RegName_X0) { + OS << "\tneg\t" << Oper1 << "," << Oper3 << "\n"; + return OS; + } else { + OS << X.statement1(); + return OS; + } + } else { + return OS << X.statement1(); + } + } else if (X.has_statement2()) { + return OS << X.statement2(); + } else if (X.has_statement3()) { + return OS << X.statement3(); + } else if (X.has_statement4()) { + return OS << X.statement4(); + } else if (X.has_statement5()) { + return OS << X.statement5(); + } else if (X.has_statement6()) { + return OS << X.statement6(); + } else if (X.has_statement7()) { + return OS << X.statement7(); + } else if (X.has_statement8()) { + return OS << X.statement8(); + } else if (X.has_statement9()) { + return OS << X.statement9(); + } + return OS; +} +std::ostream &operator<<(std::ostream &OS, const AsmStatementSeq &X) { + for (auto &ST : X.statements()) OS << ST; + return OS; +} +std::ostream &operator<<(std::ostream &OS, const Assembly &X) { + return OS << X.asmstatements(); +} + +// --------------------------------- + +std::string FunctionToString(const Assembly &Input) { + std::ostringstream OS; + OS << Input; + return OS.str(); +} +std::string ProtoToASM(const uint8_t *Data, size_t Size, bool Flag) { + Assembly Message; + NoAliases = Flag; + if (!Message.ParsePartialFromArray(Data, Size)) + return "#error invalid proto\n"; + return FunctionToString(Message); +} + +} // namespace mc_proto_fuzzer