diff --git a/clang-tools-extra/pseudo/CMakeLists.txt b/clang-tools-extra/pseudo/CMakeLists.txt --- a/clang-tools-extra/pseudo/CMakeLists.txt +++ b/clang-tools-extra/pseudo/CMakeLists.txt @@ -2,6 +2,7 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) add_subdirectory(lib) add_subdirectory(tool) +add_subdirectory(fuzzer) if(CLANG_INCLUDE_TESTS) add_subdirectory(unittests) add_subdirectory(test) diff --git a/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt b/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt @@ -0,0 +1,14 @@ +set(LLVM_LINK_COMPONENTS + FuzzMutate + Support + ) + +add_llvm_fuzzer(clang-pseudo-fuzzer + Fuzzer.cpp + DUMMY_MAIN Main.cpp + ) + +target_link_libraries(clang-pseudo-fuzzer + PRIVATE + clangPseudo + ) diff --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp @@ -0,0 +1,106 @@ +//===-- Fuzzer.cpp - Fuzz the pseudoparser --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/DirectiveTree.h" +#include "clang-pseudo/Forest.h" +#include "clang-pseudo/GLR.h" +#include "clang-pseudo/Grammar.h" +#include "clang-pseudo/LRTable.h" +#include "clang-pseudo/Token.h" +#include "clang/Basic/LangOptions.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace clang { +namespace pseudo { +namespace { + +class Fuzzer { + clang::LangOptions LangOpts = clang::pseudo::genericLangOpts(); + std::unique_ptr G; + LRTable T; + bool Print; + +public: + Fuzzer(llvm::StringRef GrammarPath, bool Print) : Print(Print) { + llvm::ErrorOr> GrammarText = + llvm::MemoryBuffer::getFile(GrammarPath); + if (std::error_code EC = GrammarText.getError()) { + llvm::errs() << "Error: can't read grammar file '" << GrammarPath + << "': " << EC.message() << "\n"; + std::exit(1); + } + std::vector Diags; + G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags); + if (!Diags.empty()) { + for (const auto &Diag : Diags) + llvm::errs() << Diag << "\n"; + std::exit(1); + } + T = LRTable::buildSLR(*G); + } + + void operator()(llvm::StringRef Code) { + std::string CodeStr = Code.str(); // Must be null-terminated. + auto RawStream = lex(CodeStr, LangOpts); + auto DirectiveStructure = DirectiveTree::parse(RawStream); + clang::pseudo::chooseConditionalBranches(DirectiveStructure, RawStream); + // FIXME: strip preprocessor directives + auto ParseableStream = + clang::pseudo::stripComments(cook(RawStream, LangOpts)); + + clang::pseudo::ForestArena Arena; + clang::pseudo::GSS GSS; + auto &Root = glrParse(ParseableStream, + clang::pseudo::ParseParams{*G, T, Arena, GSS}); + if (Print) + llvm::outs() << Root.dumpRecursive(*G); + } +}; + +Fuzzer *Fuzz = nullptr; + +} // namespace +} // namespace pseudo +} // namespace clang + +extern "C" { + +// Set up the fuzzer from command line flags: +// -grammar= (required) - path to cxx.bnf +// -print - used for testing the fuzzer +int LLVMFuzzerInitialize(int *Argc, char ***Argv) { + llvm::StringRef GrammarFile; + bool PrintForest = false; + auto ConsumeArg = [&](llvm::StringRef Arg) -> bool { + if (Arg.consume_front("-grammar=")) { + GrammarFile = Arg; + return true; + } else if (Arg == "-print") { + PrintForest = true; + return true; + } + return false; + }; + *Argc = std::remove_if(*Argv + 1, *Argv + *Argc, ConsumeArg) - *Argv; + + if (GrammarFile.empty()) { + fprintf(stderr, "Fuzzer needs -grammar=/path/to/cxx.bnf\n"); + exit(1); + } + clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(GrammarFile, PrintForest); + return 0; +} + +int LLVMFuzzerTestOneInput(uint8_t *Data, size_t Size) { + (*clang::pseudo::Fuzz)(llvm::StringRef(reinterpret_cast(Data), Size)); + return 0; +} +} diff --git a/clang-tools-extra/pseudo/fuzzer/Main.cpp b/clang-tools-extra/pseudo/fuzzer/Main.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/fuzzer/Main.cpp @@ -0,0 +1,16 @@ +//===--- Main.cpp - Entry point to sanity check the fuzzer ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/FuzzMutate/FuzzerCLI.h" + +extern "C" int LLVMFuzzerInitialize(int *, char ***); +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *, size_t); +int main(int argc, char *argv[]) { + return llvm::runFuzzerOnInputs(argc, argv, LLVMFuzzerTestOneInput, + LLVMFuzzerInitialize); +} diff --git a/clang-tools-extra/pseudo/test/CMakeLists.txt b/clang-tools-extra/pseudo/test/CMakeLists.txt --- a/clang-tools-extra/pseudo/test/CMakeLists.txt +++ b/clang-tools-extra/pseudo/test/CMakeLists.txt @@ -1,5 +1,6 @@ set(CLANG_PSEUDO_TEST_DEPS clang-pseudo + clang-pseudo-fuzzer ClangPseudoTests ) diff --git a/clang-tools-extra/pseudo/test/fuzzer.cpp b/clang-tools-extra/pseudo/test/fuzzer.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/test/fuzzer.cpp @@ -0,0 +1,4 @@ +// RUN: clang-pseudo-fuzzer -grammar=%cxx-bnf-file -print %s | FileCheck %s +int x; +// CHECK: translation-unit := declaration-seq +// CHECK: simple-type-specifier := INT