diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -58,6 +58,7 @@ apinotes-test c-index-test clang + clang-fuzzer-dictionary clang-resource-headers clang-format clang-tblgen diff --git a/clang/test/Misc/fuzzer-dictionary.test b/clang/test/Misc/fuzzer-dictionary.test new file mode 100644 --- /dev/null +++ b/clang/test/Misc/fuzzer-dictionary.test @@ -0,0 +1,4 @@ +RUN: clang-fuzzer-dictionary | FileCheck %s +CHECK-DAG: ampamp="&&" +CHECK-DAG: catch="catch" +CHECK-DAG: rawstart="R\x22(" diff --git a/clang/tools/clang-fuzzer/CMakeLists.txt b/clang/tools/clang-fuzzer/CMakeLists.txt --- a/clang/tools/clang-fuzzer/CMakeLists.txt +++ b/clang/tools/clang-fuzzer/CMakeLists.txt @@ -109,6 +109,7 @@ add_clang_subdirectory(handle-cxx) add_clang_subdirectory(handle-llvm) +add_clang_subdirectory(dictionary) add_clang_executable(clang-fuzzer EXCLUDE_FROM_ALL diff --git a/clang/tools/clang-fuzzer/dictionary/CMakeLists.txt b/clang/tools/clang-fuzzer/dictionary/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang/tools/clang-fuzzer/dictionary/CMakeLists.txt @@ -0,0 +1,2 @@ +add_clang_executable(clang-fuzzer-dictionary dictionary.c) + diff --git a/clang/tools/clang-fuzzer/dictionary/dictionary.c b/clang/tools/clang-fuzzer/dictionary/dictionary.c new file mode 100644 --- /dev/null +++ b/clang/tools/clang-fuzzer/dictionary/dictionary.c @@ -0,0 +1,57 @@ +//===-- dictionary.c - Generate fuzzing dictionary for clang --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This binary emits a fuzzing dictionary describing strings that are +// significant to the clang parser: keywords and other tokens. +// +// The dictionary can be used by a fuzzer to reach interesting parser states +// much more quickly. +// +// The output is a single-file dictionary supported by libFuzzer and AFL: +// https://llvm.org/docs/LibFuzzer.html#dictionaries +// +//===----------------------------------------------------------------------===// + +#include + +static void emit(const char *Name, const char *Spelling) { + static char Hex[] = "0123456789abcdef"; + + printf("%s=\"", Name); + unsigned char C; + while ((C = *Spelling++)) { + if (C < 32 || C == '"' || C == '\\') + printf("\\x%c%c", Hex[C>>4], Hex[C%16]); + else + printf("%c", C); + } + printf("\"\n"); +} + +int main(int argc, char **argv) { +#define PUNCTUATOR(Name, Spelling) emit(#Name, Spelling); +#define KEYWORD(Name, Criteria) emit(#Name, #Name); +#define PPKEYWORD(Name) emit(#Name, #Name); +#define CXX_KEYWORD_OPERATOR(Name, Equivalent) emit(#Name, #Name); +#define OBJC_AT_KEYWORD(Name) emit(#Name, #Name); +#define ALIAS(Spelling, Equivalent, Criteria) emit(Spelling, Spelling); +#include "clang/Basic/TokenKinds.def" + // Some other sub-token chunks significant to the lexer. + emit("ucn16", "\\u0000"); + emit("ucn32", "\\U00000000"); + emit("rawstart", "R\"("); + emit("rawend", ")\""); + emit("quote", "\""); + emit("squote", "'"); + emit("u8quote", "u8\""); + emit("u16quote", "u\""); + emit("u32quote", "U\""); + emit("esc_nl", "\\\n"); + emit("hex", "0x"); +} +