diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -1159,6 +1159,7 @@ add_subdirectory(utils/FileCheck) add_subdirectory(utils/PerfectShuffle) add_subdirectory(utils/count) + add_subdirectory(utils/llvm-jd) add_subdirectory(utils/not) add_subdirectory(utils/UnicodeData) add_subdirectory(utils/yaml-bench) diff --git a/llvm/docs/llvm-jd.rst b/llvm/docs/llvm-jd.rst new file mode 100644 --- /dev/null +++ b/llvm/docs/llvm-jd.rst @@ -0,0 +1,68 @@ +JSON diff and patch +=================== + +``llvm-jd`` is a partial C++ port for `jd`_. ``llvm-jd`` is a command-line utility and library for +diffing and patching JSON and YAML. ``llvm-jd`` supports the ``jd`` format, `JSON Merge Patch`_, and +the subset of `JSON Patch`_ that ``jd`` supports. + +``llvm-jd`` doesn't support everything that ``jd`` supports. Below are a list of options that require +further work to be supported in ``llvm-jd``: + +* ``-mset`` +* ``-setkeys`` +* ``-p`` +* ``-t FORMAT`` +* ``-yaml`` + +.. _`jd`: https://github.com/josephburnett/jd +.. _`JSON Merge Patch`: https://datatracker.ietf.org/doc/html/rfc7386 +.. _`JSON Patch`: https://datatracker.ietf.org/doc/html/rfc6902 + +Installation +------------ + +To build ``llvm-jd``, enable ``LLVM_BUILD_UTILS`` when configuring CMake. + +Command-line usage +------------------ + +.. code-block:: + :caption: Output from ``llvm-jd --help``. + + OVERVIEW: Diff JSON files. + + Prints the diff of FILE1 and FILE2 to STDOUT. + The second input is read from STDIN if FILE2 is omitted. + + USAGE: llvm-jd [options] FILE1 FILE2 + + OPTIONS: + + Generic Options: + + --help - Display available options (--help-hidden for more) + --help-list - Display list of available options (--help-list-hidden for more) + --version - Display the version of this program + + llvm-jd Options: + + -f= - Produce diff in format. + =jd - JSON diff (default) + =patch - RFC 6902 + =merge - RFC 7386 + -o - Write to instead of STDOUT. + --set - Treat arrays as sets. + +Diff language +------------- + +.. code-block:: + :caption: Context-free grammar describing the diff language. + + Diff -> Section* + Section -> Header (Remove* | Add) Add* + Header -> '@' '[' Element* ']' '\n' + Element -> JSON_STRING | JSON_NUMBER | Object + Object -> '{' '}' + Add -> '+' JSON_VALUE '\n' + Remove -> '-' JSON_VALUE '\n' diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -2089,6 +2089,14 @@ adl_end(RRange)); } +/// Wrapper function around std::mismatch to detect where pair-wise elements +/// between two ranges differ. +template > +auto mismatch(R1 &&r1, R2 &&r2, F f = {}) { + return std::mismatch(adl_begin(r1), adl_end(r1), adl_begin(r2), adl_end(r2), + std::ref(f)); +} + /// Returns true if all elements in Range are equal or when the Range is empty. template bool all_equal(R &&Range) { auto Begin = adl_begin(Range); diff --git a/llvm/include/llvm/Support/JSON.h b/llvm/include/llvm/Support/JSON.h --- a/llvm/include/llvm/Support/JSON.h +++ b/llvm/include/llvm/Support/JSON.h @@ -47,14 +47,15 @@ #define LLVM_SUPPORT_JSON_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include #include +#include namespace llvm { namespace json { @@ -1075,6 +1076,15 @@ OStream(OS).value(V); return OS; } + +/// Computes a non-cryptographic hash for an array. +std::size_t Hash(const Array &a) noexcept; + +/// Computes a non-cryptographic hash for an object. +std::size_t Hash(const Object &a) noexcept; + +/// Computes a non-cryptographic hash for a value. +std::size_t Hash(const Value &v) noexcept; } // namespace json /// Allow printing json::Value with formatv(). @@ -1085,4 +1095,24 @@ }; } // namespace llvm +namespace std { +template <> struct hash { + size_t operator()(const llvm::json::Array &a) const noexcept { + return llvm::json::Hash(a); + } +}; + +template <> struct hash { + size_t operator()(const llvm::json::Object &a) const noexcept { + return llvm::json::Hash(a); + } +}; + +template <> struct hash { + size_t operator()(const llvm::json::Value &v) const noexcept { + return llvm::json::Hash(v); + } +}; +} // namespace std + #endif diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp --- a/llvm/lib/Support/JSON.cpp +++ b/llvm/lib/Support/JSON.cpp @@ -918,6 +918,49 @@ Stack.pop_back(); } +std::size_t Hash(const Array &a) noexcept { + constexpr size_t OffsetBasis = 14695981039346656037ULL; + constexpr size_t Prime = 1099511628211ULL; + return std::accumulate(a.begin(), a.end(), OffsetBasis, + [](size_t x, const Value &y) { + x *= Prime; + x ^= llvm::json::Hash(y); + return x; + }); +} + +std::size_t Hash(const Object &a) noexcept { + constexpr size_t OffsetBasis = 14695981039346656037ULL; + constexpr size_t Prime = 1099511628211ULL; + return std::accumulate(a.begin(), a.end(), OffsetBasis, + [](size_t x, const Object::value_type &y) { + x *= Prime; + x ^= std::hash{}(y.getFirst().str()); + x *= Prime; + x ^= Hash(y.getSecond()); + return x; + }); +} + +std::size_t Hash(const Value &v) noexcept { + switch (v.kind()) { + case llvm::json::Value::Null: + return std::hash{}(nullptr); + case llvm::json::Value::Boolean: + return std::hash{}(*v.getAsBoolean()); + case llvm::json::Value::Number: + return std::hash{}(*v.getAsNumber()); + case llvm::json::Value::String: + return std::hash{}(*v.getAsString()); + case llvm::json::Value::Array: + return Hash(*v.getAsArray()); + case llvm::json::Value::Object: + return Hash(*v.getAsObject()); + } + + llvm_unreachable("kind unaccounted for"); +} + } // namespace json } // namespace llvm @@ -928,4 +971,3 @@ llvm_unreachable("json::Value format options should be an integer"); json::OStream(OS, IndentAmount).value(E); } - diff --git a/llvm/test/jd/array.txt b/llvm/test/jd/array.txt new file mode 100644 --- /dev/null +++ b/llvm/test/jd/array.txt @@ -0,0 +1,210 @@ +# Arrays +######## + +Array vs scalar tests are found in the scalar test files (testing here would be redundant). + +RUN: echo '["hello", "world"]' > %t.canonical_string_array +RUN: echo '[true, false, true, true]' > %t.canonical_bool_array +RUN: echo '[1, 2, 3]' > %t.canonical_number_array + +## Array vs object +################## + +RUN: echo '{"hello":"world"}' > %t.object +RUN: llvm-jd %t.canonical_string_array %t.object | FileCheck --check-prefix=OUTPUT1 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT1: @ [] +OUTPUT1: - ["hello","world"] +OUTPUT1: + {"hello":"world"} + +RUN: llvm-jd %t.object %t.canonical_string_array | FileCheck --check-prefix=OUTPUT2 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT2: @ [] +OUTPUT2: - {"hello":"world"} +OUTPUT2: + ["hello","world"] + +## Same size, same type, same values +#################################### + +RUN: llvm-jd %t.canonical_string_array %t.canonical_string_array +RUN: llvm-jd %t.canonical_number_array %t.canonical_number_array +RUN: llvm-jd %t.canonical_bool_array %t.canonical_bool_array + +## Same size, same type, different values +######################################### + +RUN: echo '["another", "array"]' > %t.another_string_array +RUN: llvm-jd %t.canonical_string_array %t.another_string_array | FileCheck --check-prefix=OUTPUT3 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT3: @ [1] +OUTPUT3: - "world" +OUTPUT3: + "array" +OUTPUT3: @ [0] +OUTPUT3: - "hello" +OUTPUT3: + "another" + +RUN: llvm-jd %t.another_string_array %t.canonical_string_array | FileCheck --check-prefix=OUTPUT4 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT4: @ [1] +OUTPUT4: - "array" +OUTPUT4: + "world" +OUTPUT4: @ [0] +OUTPUT4: - "another" +OUTPUT4: + "hello" + +RUN: echo '[3, 2, 1]' > %t.another_number_array +RUN: llvm-jd %t.canonical_number_array %t.another_number_array | FileCheck --check-prefix=OUTPUT5 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT5: @ [2] +OUTPUT5: - 3 +OUTPUT5: + 1 +OUTPUT5: @ [0] +OUTPUT5: - 1 +OUTPUT5: + 3 + +RUN: echo '[3, 2, 1]' > %t.another_number_array +RUN: llvm-jd %t.another_number_array %t.canonical_number_array | FileCheck --check-prefix=OUTPUT6 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT6: @ [2] +OUTPUT6: - 1 +OUTPUT6: + 3 +OUTPUT6: @ [0] +OUTPUT6: - 3 +OUTPUT6: + 1 + +## Different size, same type, same values +######################################### + +RUN: echo '["hello"]' > %t.smaller_string_array +RUN: llvm-jd %t.canonical_string_array %t.smaller_string_array | FileCheck --check-prefix=OUTPUT7 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT7: @ [1] +OUTPUT7: - "world" + +RUN: llvm-jd %t.smaller_string_array %t.canonical_string_array | FileCheck --check-prefix=OUTPUT8 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT8: @ [-1] +OUTPUT8: + "world" + +RUN: echo '[true, false]' > %t.smaller_bool_array +RUN: llvm-jd %t.canonical_bool_array %t.smaller_bool_array | FileCheck --check-prefix=OUTPUT9 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT9: @ [3] +OUTPUT9: - true +OUTPUT9: @ [2] +OUTPUT9: - true + +RUN: llvm-jd %t.smaller_bool_array %t.canonical_bool_array | FileCheck --check-prefix=OUTPUT10 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT10: @ [-1] +OUTPUT10: + true +OUTPUT10: @ [-1] +OUTPUT10: + true + +RUN: echo '[1, 2]' > %t.smaller_number_array +RUN: llvm-jd %t.canonical_number_array %t.smaller_number_array | FileCheck --check-prefix=OUTPUT11 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT11: @ [2] +OUTPUT11: - 3 + +RUN: echo '[1, 2]' > %t.smaller_number_array +RUN: llvm-jd %t.smaller_number_array %t.canonical_number_array | FileCheck --check-prefix=OUTPUT12 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT12: @ [-1] +OUTPUT12: + 3 + +## Same size, some values same +############################## + +RUN: echo '[-6, 2, 3]' > %t.another_number_array +RUN: llvm-jd %t.another_number_array %t.canonical_number_array | FileCheck --check-prefix=OUTPUT13 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT13: @ [0] +OUTPUT13: - -6 +OUTPUT13: + 1 + +RUN: echo '[1, 4, 3]' > %t.another_number_array +RUN: llvm-jd %t.another_number_array %t.canonical_number_array | FileCheck --check-prefix=OUTPUT14 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT14: @ [1] +OUTPUT14: - 4 +OUTPUT14: + 2 + +## Same size, different type +############################ + +RUN: llvm-jd %t.canonical_string_array %t.smaller_number_array | FileCheck --check-prefix=OUTPUT15 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT15: @ [1] +OUTPUT15: - "world" +OUTPUT15: + 2 +OUTPUT15: @ [0] +OUTPUT15: - "hello" +OUTPUT15: + 1 + +RUN: echo '[1, "two", true, [true]]' > %t.mixed_array +RUN: llvm-jd %t.canonical_bool_array %t.mixed_array | FileCheck --check-prefix=OUTPUT16 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT16: @ [3] +OUTPUT16: - true +OUTPUT16: + [true] +OUTPUT16: @ [1] +OUTPUT16: - false +OUTPUT16: + "two" +OUTPUT16: @ [0] +OUTPUT16: - true +OUTPUT16: + 1 + +## Array of array diff +###################### + +RUN: echo '[[1, 2]]' > %t.aoa1 +RUN: echo '[[3, 2]]' > %t.aoa2 +RUN: llvm-jd %t.aoa1 %t.aoa2 | FileCheck --check-prefix=OUTPUT17 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT17: @ [0,0] +OUTPUT17: - 1 +OUTPUT17: + 3 + +RUN: echo '[[1, 2, 3]]' > %t.aoa1 +RUN: echo '[[3, 2, 1]]' > %t.aoa2 +RUN: llvm-jd %t.aoa1 %t.aoa2 | FileCheck --check-prefix=OUTPUT18 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT18: @ [0,0] +OUTPUT18: - 1 +OUTPUT18: + 3 +OUTPUT18: @ [0,2] +OUTPUT18: - 3 +OUTPUT18: + 1 + +RUN: echo '[[1], [1, 2, 3]]' > %t.aoa1 +RUN: echo '[[1], [3, 2, 1]]' > %t.aoa2 +RUN: llvm-jd %t.aoa1 %t.aoa2 | FileCheck --check-prefix=OUTPUT19 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT19: @ [1,0] +OUTPUT19: - 1 +OUTPUT19: + 3 +OUTPUT19: @ [1,2] +OUTPUT19: - 3 +OUTPUT19: + 1 + +RUN: echo '[1, [1, 2, 3]]' > %t.aoa1 +RUN: echo '[2, [3, 2, 3]]' > %t.aoa2 +RUN: llvm-jd %t.aoa1 %t.aoa2 | FileCheck --check-prefix=OUTPUT20 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT20: @ [1,0] +OUTPUT20: - 1 +OUTPUT20: + 3 +OUTPUT20: @ [0] +OUTPUT20: - 1 +OUTPUT20: + 2 + +RUN: echo '[[1, 2, 3, 4, 5]]' > %t.aoa1 +RUN: echo '[[1, 2, 3]]' > %t.aoa2 +RUN: llvm-jd %t.aoa1 %t.aoa2 | FileCheck --check-prefix=OUTPUT21 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT21: @ [0,3] +OUTPUT21: - 4 +OUTPUT21: @ [0,4] +OUTPUT21: - 5 + +RUN: llvm-jd %t.aoa2 %t.aoa1 | FileCheck --check-prefix=OUTPUT22 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT22: @ [0,-1] +OUTPUT22: + 4 +OUTPUT22: @ [0,-1] +OUTPUT22: + 5 + +## Array of objects + +RUN: echo '[{"a": "b"}, {"a": {"b": true}}, {"a": {"b": {"c": 1}}}]' > %t.aoo1 +RUN: echo '[{"a": "c"}, {"a": {"b": false}}, {"a": {"b": {"c": 2}}}]' > %t.aoo2 + +RUN: llvm-jd %t.aoo1 %t.aoo2 | FileCheck --check-prefix=OUTPUT23 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT23: @ [2,"a","b","c"] +OUTPUT23: - 1 +OUTPUT23: + 2 +OUTPUT23: @ [1,"a","b"] +OUTPUT23: - true +OUTPUT23: + false +OUTPUT23: @ [0,"a"] +OUTPUT23: - "b" +OUTPUT23: + "c" diff --git a/llvm/test/jd/bool.txt b/llvm/test/jd/bool.txt new file mode 100644 --- /dev/null +++ b/llvm/test/jd/bool.txt @@ -0,0 +1,135 @@ +RUN: touch %t.empty + +# Boolean +######### + +RUN: echo true > %t.true +RUN: $(llvm-jd %t.true %t.true) + +RUN: llvm-jd %t.true %t.empty | FileCheck --check-prefix=OUTPUT1 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT1: @ [] +OUTPUT1: - true + +RUN: llvm-jd %t.empty %t.true | FileCheck --check-prefix=OUTPUT2 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT2: @ [] +OUTPUT2: + true + +# Boolean vs Boolean +#################### + +RUN: echo false > %t.false + +RUN: llvm-jd %t.true %t.false | FileCheck --check-prefix=OUTPUT3 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT3: @ [] +OUTPUT3: - true +OUTPUT3: + false + +RUN: llvm-jd %t.false %t.true | FileCheck --check-prefix=OUTPUT4 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT4: @ [] +OUTPUT4: - false +OUTPUT4: + true + +# Boolean vs number +################### + +RUN: echo 1.1 > %t.number + +RUN: llvm-jd %t.true %t.number | FileCheck --check-prefix=OUTPUT5 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT5: @ [] +OUTPUT5: - true +OUTPUT5: + 1.1 + +RUN: llvm-jd %t.number %t.true | FileCheck --check-prefix=OUTPUT6 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT6: @ [] +OUTPUT6: - 1.1 +OUTPUT6: + true + +# Boolean vs string +################### + +RUN: echo '"true"' > %t.string + +RUN: llvm-jd %t.true %t.string | FileCheck --check-prefix=OUTPUT7 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT7: @ [] +OUTPUT7: - true +OUTPUT7: + "true" + +RUN: llvm-jd %t.string %t.true | FileCheck --check-prefix=OUTPUT8 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT8: @ [] +OUTPUT8: - "true" +OUTPUT8: + true + +# Boolean vs empty array +######################## + +RUN: echo '[]' > %t.empty_array + +RUN: llvm-jd %t.true %t.empty_array | FileCheck --check-prefix=OUTPUT9 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT9: @ [] +OUTPUT9: - true +OUTPUT9: + [] + +RUN: llvm-jd %t.empty_array %t.true | FileCheck --check-prefix=OUTPUT10 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT10: @ [] +OUTPUT10: - [] +OUTPUT10: + true + +# Boolean vs Boolean array +########################## + +RUN: echo '[true]' > %t.bool_array1 + +RUN: llvm-jd %t.bool_array1 %t.true | FileCheck --check-prefix=OUTPUT12 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT12: @ [] +OUTPUT12: - [true] +OUTPUT12: + true + +RUN: llvm-jd %t.true %t.bool_array1 | FileCheck --check-prefix=OUTPUT13 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT13: @ [] +OUTPUT13: - true +OUTPUT13: + [true] + +# Boolean vs number array +######################### + +RUN: echo '[0, 2, 4, 6, 8]' > %t.num_array1 + +RUN: llvm-jd %t.num_array1 %t.true | FileCheck --check-prefix=OUTPUT14 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT14: @ [] +OUTPUT14: - [0,2,4,6,8] +OUTPUT14: + true + +RUN: llvm-jd %t.true %t.num_array1 | FileCheck --check-prefix=OUTPUT15 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT15: @ [] +OUTPUT15: - true +OUTPUT15: + [0,2,4,6,8] + +# Boolean vs string array +######################### + +RUN: echo '["true"]' > %t.string_array1 + +RUN: llvm-jd %t.string_array1 %t.true | FileCheck --check-prefix=OUTPUT16 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT16: @ [] +OUTPUT16: - ["true"] +OUTPUT16: + true + +RUN: llvm-jd %t.true %t.string_array1 | FileCheck --check-prefix=OUTPUT17 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT17: @ [] +OUTPUT17: - true +OUTPUT17: + ["true"] + +# Number vs object +################## + +RUN: echo '{"hello": "world"}' > %t.object + +RUN: llvm-jd %t.object %t.true | FileCheck --check-prefix=OUTPUT18 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT18: @ [] +OUTPUT18: - {"hello":"world"} +OUTPUT18: + true + +RUN: llvm-jd %t.true %t.object | FileCheck --check-prefix=OUTPUT19 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT19: @ [] +OUTPUT19: - true +OUTPUT19: + {"hello":"world"} diff --git a/llvm/test/jd/mset.txt b/llvm/test/jd/mset.txt new file mode 100644 --- /dev/null +++ b/llvm/test/jd/mset.txt @@ -0,0 +1,35 @@ +# Multisets +########### + +Multisets are arrays, but llvm-jd doesn't care about the ordering. + +RUN: echo '[1, 2, 3]' > %t.mset1 +RUN: echo '[3, 2, 1]' > %t.mset2 + +RUN: llvm-jd -mset %t.mset1 %t.mset2 +RUN: llvm-jd -mset %t.mset2 %t.mset1 + +RUN: echo '[1, 2, 4]' > %t.mset3 +RUN: llvm-jd -mset %t.mset1 %t.mset3 | FileCheck --check-prefix=OUTPUT1 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT1: @ {{\[}}["multiset"],{}] +OUTPUT1: - 3 +OUTPUT1: + 4 + +RUN: llvm-jd -mset %t.mset3 %t.mset1 | FileCheck --check-prefix=OUTPUT2 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT2: @ {{\[}}["multiset"],{}] +OUTPUT2: - 4 +OUTPUT2: + 3 + +RUN: echo '{"a": ["x", 1, true]}' > %t.obj-mset1 +RUN: echo '{"a": ["x", true, 1]}' > %t.obj-mset2 + +RUN: llvm-jd -mset %t.obj-mset1 %t.obj-mset2 +RUN: llvm-jd -mset %t.obj-mset2 %t.obj-mset1 + +RUN: echo '{"a": ["x", "y", "z"]}' > %t.obj-mset3 +RUN: llvm-jd -mset %t.obj-mset1 %t.obj-mset3 | FileCheck --check-prefix=OUTPUT3 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT3: @ ["a",["multiset"],{}] +OUTPUT3: - true +OUTPUT3: - 1 +OUTPUT3: + "y" +OUTPUT3: + "z" diff --git a/llvm/test/jd/number.txt b/llvm/test/jd/number.txt new file mode 100644 --- /dev/null +++ b/llvm/test/jd/number.txt @@ -0,0 +1,135 @@ +RUN: touch %t.empty + +# Number +######## + +RUN: echo 4.04 > %t.4.04 +RUN: $(llvm-jd %t.4.04 %t.4.04) + +RUN: llvm-jd %t.4.04 %t.empty | FileCheck --check-prefix=OUTPUT1 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT1: @ [] +OUTPUT1: - 4.04 + +RUN: llvm-jd %t.empty %t.4.04 | FileCheck --check-prefix=OUTPUT2 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT2: @ [] +OUTPUT2: + 4.04 + +# Number vs number +################## + +RUN: echo -3 > %t.-3 + +RUN: llvm-jd %t.4.04 %t.-3 | FileCheck --check-prefix=OUTPUT3 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT3: @ [] +OUTPUT3: - 4.04 +OUTPUT3: + -3 + +RUN: llvm-jd %t.-3 %t.4.04 | FileCheck --check-prefix=OUTPUT4 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT4: @ [] +OUTPUT4: - -3 +OUTPUT4: + 4.04 + +# Number vs Boolean +################### + +RUN: echo true > %t.true + +RUN: llvm-jd %t.4.04 %t.true | FileCheck --check-prefix=OUTPUT5 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT5: @ [] +OUTPUT5: - 4.04 +OUTPUT5: + true + +RUN: llvm-jd %t.true %t.4.04 | FileCheck --check-prefix=OUTPUT6 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT6: @ [] +OUTPUT6: - true +OUTPUT6: + 4.04 + +# Number vs string +################### + +RUN: echo '"4.04"' > %t.string + +RUN: llvm-jd %t.4.04 %t.string | FileCheck --check-prefix=OUTPUT7 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT7: @ [] +OUTPUT7: - 4.04 +OUTPUT7: + "4.04" + +RUN: llvm-jd %t.string %t.4.04 | FileCheck --check-prefix=OUTPUT8 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT8: @ [] +OUTPUT8: - "4.04" +OUTPUT8: + 4.04 + +# Number vs empty array +######################## + +RUN: echo '[]' > %t.empty_array + +RUN: llvm-jd %t.4.04 %t.empty_array | FileCheck --check-prefix=OUTPUT9 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT9: @ [] +OUTPUT9: - 4.04 +OUTPUT9: + [] + +RUN: llvm-jd %t.empty_array %t.4.04 | FileCheck --check-prefix=OUTPUT10 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT10: @ [] +OUTPUT10: - [] +OUTPUT10: + 4.04 + +# Number vs Boolean array +########################## + +RUN: echo '[true]' > %t.bool_array1 + +RUN: llvm-jd %t.bool_array1 %t.4.04 | FileCheck --check-prefix=OUTPUT12 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT12: @ [] +OUTPUT12: - [true] +OUTPUT12: + 4.04 + +RUN: llvm-jd %t.4.04 %t.bool_array1 | FileCheck --check-prefix=OUTPUT13 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT13: @ [] +OUTPUT13: - 4.04 +OUTPUT13: + [true] + +# Number vs number array +######################### + +RUN: echo '[4.04, 2, 4, 6, 8]' > %t.num_array1 + +RUN: llvm-jd %t.num_array1 %t.4.04 | FileCheck --check-prefix=OUTPUT14 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT14: @ [] +OUTPUT14: - [4.04,2,4,6,8] +OUTPUT14: + 4.04 + +RUN: llvm-jd %t.4.04 %t.num_array1 | FileCheck --check-prefix=OUTPUT15 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT15: @ [] +OUTPUT15: - 4.04 +OUTPUT15: + [4.04,2,4,6,8] + +# Number vs string array +######################### + +RUN: echo '["4.04"]' > %t.string_array1 + +RUN: llvm-jd %t.string_array1 %t.4.04 | FileCheck --check-prefix=OUTPUT16 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT16: @ [] +OUTPUT16: - ["4.04"] +OUTPUT16: + 4.04 + +RUN: llvm-jd %t.4.04 %t.string_array1 | FileCheck --check-prefix=OUTPUT17 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT17: @ [] +OUTPUT17: - 4.04 +OUTPUT17: + ["4.04"] + +# Number vs object +################## + +RUN: echo '{"hello": "world"}' > %t.object + +RUN: llvm-jd %t.object %t.4.04 | FileCheck --check-prefix=OUTPUT18 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT18: @ [] +OUTPUT18: - {"hello":"world"} +OUTPUT18: + 4.04 + +RUN: llvm-jd %t.4.04 %t.object | FileCheck --check-prefix=OUTPUT19 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT19: @ [] +OUTPUT19: - 4.04 +OUTPUT19: + {"hello":"world"} diff --git a/llvm/test/jd/object.txt b/llvm/test/jd/object.txt new file mode 100644 --- /dev/null +++ b/llvm/test/jd/object.txt @@ -0,0 +1,140 @@ +# Simple objects +################ + +RUN: echo '{"a":"x"}' > %t.simple1 +RUN: echo '{"a":"y"}' > %t.simple2 + +RUN: llvm-jd %t.simple1 %t.simple2 | FileCheck --check-prefix=OUTPUT1 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT1: @ ["a"] +OUTPUT1: - "x" +OUTPUT1: + "y" + +RUN: llvm-jd %t.simple2 %t.simple1 | FileCheck --check-prefix=OUTPUT2 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT2: @ ["a"] +OUTPUT2: - "y" +OUTPUT2: + "x" + +# Multi-value objects +##################### + +RUN: echo '{"a": "x", "b": "y"}' > %t.multi1 +RUN: echo '{"a": "x", "b": "z"}' > %t.multi2 +RUN: echo '{"a": "z", "b": "y"}' > %t.multi3 +RUN: echo '{"a": "z", "b": "z"}' > %t.multi4 + +RUN: llvm-jd %t.multi1 %t.multi2 | FileCheck --check-prefix=OUTPUT3 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT3: @ ["b"] +OUTPUT3: - "y" +OUTPUT3: + "z" + +RUN: llvm-jd %t.multi2 %t.multi1 | FileCheck --check-prefix=OUTPUT4 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT4: @ ["b"] +OUTPUT4: - "z" +OUTPUT4: + "y" + +RUN: llvm-jd %t.multi1 %t.multi3 | FileCheck --check-prefix=OUTPUT5 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT5: @ ["a"] +OUTPUT5: - "x" +OUTPUT5: + "z" + +RUN: llvm-jd %t.multi3 %t.multi1 | FileCheck --check-prefix=OUTPUT6 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT6: @ ["a"] +OUTPUT6: - "z" +OUTPUT6: + "x" + +RUN: llvm-jd %t.multi1 %t.multi4 | FileCheck --check-prefix=OUTPUT8 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT8: @ ["a"] +OUTPUT8: - "x" +OUTPUT8: + "z" +OUTPUT8: @ ["b"] +OUTPUT8: - "y" +OUTPUT8: + "z" + +RUN: llvm-jd %t.multi4 %t.multi1 | FileCheck --check-prefix=OUTPUT9 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT9: @ ["a"] +OUTPUT9: - "z" +OUTPUT9: + "x" +OUTPUT9: @ ["b"] +OUTPUT9: - "z" +OUTPUT9: + "y" + +# Differently-sized objects +########################### + +RUN: llvm-jd %t.simple1 %t.multi1 | FileCheck --check-prefix=OUTPUT10 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT10: @ ["b"] +OUTPUT10: + "y" + +RUN: llvm-jd %t.multi1 %t.simple1 | FileCheck --check-prefix=OUTPUT11 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT11: @ ["b"] +OUTPUT11: - "y" + +RUN: llvm-jd %t.simple2 %t.multi1 | FileCheck --check-prefix=OUTPUT12 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT12 @ ["a"] +OUTPUT12: - "y" +OUTPUT12: + "x" +OUTPUT12: @ ["b"] +OUTPUT12: + "y" + +RUN: llvm-jd %t.multi1 %t.simple2 | FileCheck --check-prefix=OUTPUT13 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT13 @ ["a"] +OUTPUT13: - "x" +OUTPUT13: + "y" +OUTPUT13: @ ["b"] +OUTPUT13: - "y" + +# Objects with something in-between +################################### + +RUN: echo '{"a": 0, "b": true, "c": "hello"}' > %t.triple1 +RUN: echo '{"a": 0, "c": "goodbye", "b": false}' > %t.triple2 + +RUN: llvm-jd %t.triple1 %t.triple2 | FileCheck --check-prefix=OUTPUT14 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT14: @ ["b"] +OUTPUT14: - true +OUTPUT14: + false +OUTPUT14: @ ["c"] +OUTPUT14: - "hello" +OUTPUT14: + "goodbye" + +# Nested objects +################ + +RUN: echo '{"a": {"b": true}, "b": {"a": false}}' > %t.nested1 +RUN: echo '{"a": {"b": false}, "b": {"a": false}}' > %t.nested2 + +RUN: llvm-jd %t.nested1 %t.nested2 | FileCheck --check-prefix=OUTPUT15 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT15: @ ["a","b"] +OUTPUT15: - true +OUTPUT15: + false + +RUN: echo '{"a": {"b": false}, "b": {"a": true}}' > %t.nested3 + +RUN: llvm-jd %t.nested1 %t.nested3 | FileCheck --check-prefix=OUTPUT16 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT16: @ ["a","b"] +OUTPUT16: - true +OUTPUT16: + false +OUTPUT16: @ ["b","a"] +OUTPUT16: - false +OUTPUT16: + true + +RUN: echo '{"a": {"b": {"c": "d"}}}' > %t.nested4 +RUN: echo '{"a": {"b": {"c": "x"}, "e": "f"}}' > %t.nested5 + +RUN: llvm-jd %t.nested4 %t.nested5 | FileCheck --check-prefix=OUTPUT17 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT17: @ ["a","b","c"] +OUTPUT17: - "d" +OUTPUT17: + "x" +OUTPUT17: @ ["a","e"] +OUTPUT17: + "f" + +# Object with an array +###################### + +RUN: echo '{"a": {"b": {"c": [1, 2, 3]}}}' > %t.array1 +RUN: echo '{"a": {"b": {"c": [1, 2, 4]}}}' > %t.array2 + +RUN: llvm-jd %t.array1 %t.array2 | FileCheck --check-prefix=OUTPUT18 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT18: @ ["a","b","c",2] +OUTPUT18: - 3 +OUTPUT18: + 4 diff --git a/llvm/test/jd/string.txt b/llvm/test/jd/string.txt new file mode 100644 --- /dev/null +++ b/llvm/test/jd/string.txt @@ -0,0 +1,135 @@ +RUN: touch %t.empty + +# String +######## + +RUN: echo '"hello"' > %t.string1 +RUN: $(llvm-jd %t.string1 %t.string1) + +RUN: llvm-jd %t.string1 %t.empty | FileCheck --check-prefix=OUTPUT1 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT1: @ [] +OUTPUT1: - "hello" + +RUN: llvm-jd %t.empty %t.string1 | FileCheck --check-prefix=OUTPUT2 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT2: @ [] +OUTPUT2: + "hello" + +# String vs number +################## + +RUN: echo -3 > %t.-3 + +RUN: llvm-jd %t.string1 %t.-3 | FileCheck --check-prefix=OUTPUT3 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT3: @ [] +OUTPUT3: - "hello" +OUTPUT3: + -3 + +RUN: llvm-jd %t.-3 %t.string1 | FileCheck --check-prefix=OUTPUT4 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT4: @ [] +OUTPUT4: - -3 +OUTPUT4: + "hello" + +# String vs Boolean +################### + +RUN: echo true > %t.true + +RUN: llvm-jd %t.string1 %t.true | FileCheck --check-prefix=OUTPUT5 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT5: @ [] +OUTPUT5: - "hello" +OUTPUT5: + true + +RUN: llvm-jd %t.true %t.string1 | FileCheck --check-prefix=OUTPUT6 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT6: @ [] +OUTPUT6: - true +OUTPUT6: + "hello" + +# String vs string +################### + +RUN: echo '"world"' > %t.string + +RUN: llvm-jd %t.string1 %t.string | FileCheck --check-prefix=OUTPUT7 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT7: @ [] +OUTPUT7: - "hello" +OUTPUT7: + "world" + +RUN: llvm-jd %t.string %t.string1 | FileCheck --check-prefix=OUTPUT8 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT8: @ [] +OUTPUT8: - "world" +OUTPUT8: + "hello" + +# String vs empty array +######################## + +RUN: echo '[]' > %t.empty_array + +RUN: llvm-jd %t.string1 %t.empty_array | FileCheck --check-prefix=OUTPUT9 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT9: @ [] +OUTPUT9: - "hello" +OUTPUT9: + [] + +RUN: llvm-jd %t.empty_array %t.string1 | FileCheck --check-prefix=OUTPUT10 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT10: @ [] +OUTPUT10: - [] +OUTPUT10: + "hello" + +# String vs Boolean array +########################## + +RUN: echo '[true]' > %t.bool_array1 + +RUN: llvm-jd %t.bool_array1 %t.string1 | FileCheck --check-prefix=OUTPUT12 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT12: @ [] +OUTPUT12: - [true] +OUTPUT12: + "hello" + +RUN: llvm-jd %t.string1 %t.bool_array1 | FileCheck --check-prefix=OUTPUT13 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT13: @ [] +OUTPUT13: - "hello" +OUTPUT13: + [true] + +# String vs number array +######################### + +RUN: echo '[4.04, 2, 4, 6, 8]' > %t.num_array1 + +RUN: llvm-jd %t.num_array1 %t.string1 | FileCheck --check-prefix=OUTPUT14 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT14: @ [] +OUTPUT14: - [4.04,2,4,6,8] +OUTPUT14: + "hello" + +RUN: llvm-jd %t.string1 %t.num_array1 | FileCheck --check-prefix=OUTPUT15 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT15: @ [] +OUTPUT15: - "hello" +OUTPUT15: + [4.04,2,4,6,8] + +# String vs string array +######################### + +RUN: echo '["hello"]' > %t.string_array1 + +RUN: llvm-jd %t.string_array1 %t.string1 | FileCheck --check-prefix=OUTPUT16 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT16: @ [] +OUTPUT16: - ["hello"] +OUTPUT16: + "hello" + +RUN: llvm-jd %t.string1 %t.string_array1 | FileCheck --check-prefix=OUTPUT17 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT17: @ [] +OUTPUT17: - "hello" +OUTPUT17: + ["hello"] + +# String vs object +################## + +RUN: echo '{"hello": "world"}' > %t.object + +RUN: llvm-jd %t.object %t.string1 | FileCheck --check-prefix=OUTPUT18 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT18: @ [] +OUTPUT18: - {"hello":"world"} +OUTPUT18: + "hello" + +RUN: llvm-jd %t.string1 %t.object | FileCheck --check-prefix=OUTPUT19 %s || [ ${PIPESTATUS[0]} == 1 ] +OUTPUT19: @ [] +OUTPUT19: - "hello" +OUTPUT19: + {"hello":"world"} diff --git a/llvm/utils/llvm-jd/CMakeLists.txt b/llvm/utils/llvm-jd/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/utils/llvm-jd/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_utility( + llvm-jd + llvm-jd.cpp +) + +target_link_libraries(llvm-jd PRIVATE LLVMSupport) diff --git a/llvm/utils/llvm-jd/README.rst b/llvm/utils/llvm-jd/README.rst new file mode 100644 --- /dev/null +++ b/llvm/utils/llvm-jd/README.rst @@ -0,0 +1,51 @@ +llvm-jd +======= + +llvm-jd is a C++ port of `jd`_, which is a JSON diff tool. It supports the subset +that we need for LLVM, as described below. We're happy to accept patches if you +need the other capabilities of jd. + +Why isn't FileCheck sufficient for diffing JSON? +------------------------------------------------ + +FileCheck is well-suited to output that follows a particular sequence and where +delta outputs are fairly readable upon inspection. Our JSON output can sometimes +appear in different orders, and due to the structuring of JSON, it's often +difficult to work out where in the JSON object something is. llvm-jd is able to +report the exact position of a diff more directly, and can also take order (or +lack thereof) into account. + +Why not use the original jd? +---------------------------- + +jd is a Go-based tool, and `LLVM is reluctant depend on Go`_, since it doesn't +support all the platforms that we target. + +Why isn't everything supported? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We're only supporting the components we need because they have active users. If +you're able to use Go in your environment, you should use the original jd tool. +If you can't, and require a feature that we haven't implemented yet, then you're +welcome to extend the tool. + +Which features are supported? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +llvm-jd supports all of the default usage and turning arrays into multisets (``-mset``). + +Which features aren't supported? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Color output (``-color``) +* Patching (``-p``) +* Sets (``-set``) +* Set keys (``-setkeys``) +* YAML support (``-yaml``) +* Web UI support (``-port=N``) +* RFC 6902 mode (``-format=patch``) +* RFC 7386 mode (``-format=merge``) +* Format mode translation (``-t=FORMAT``) + +.. _`jd`: https://github.com/josephburnett/jd +.. _`LLVM project is reluctant to add a dependency on Go`: https://discourse.llvm.org/t/adding-a-json-diff-tool-to-clangs-ci diff --git a/llvm/utils/llvm-jd/llvm-jd.cpp b/llvm/utils/llvm-jd/llvm-jd.cpp new file mode 100644 --- /dev/null +++ b/llvm/utils/llvm-jd/llvm-jd.cpp @@ -0,0 +1,429 @@ +//===- FileCheck.cpp - Check that File's Contents match what is expected --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// llvm-jd diffs two JSON files, which is useful for checking output from Clang, +// Clang Static Analyser, and Clang Tidy. It is a port of the Go tool jd. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/JSON.h" +#include +#include +#include +#include +#include +#include +#include + +namespace cl = llvm::cl; +namespace json = llvm::json; + +static cl::OptionCategory Cat("llvm-jd Options"); +static cl::opt + Output("o", cl::desc("Write to instead of STDOUT."), cl::cat(Cat), + cl::value_desc("output")); +static cl::opt Set("mset", cl::desc("Treat arrays as multisets."), + cl::cat(Cat)); + +enum FormatType { + jd, + patch, + merge, +}; +static cl::opt Format( + "f", cl::desc("Produce diff in format."), cl::cat(Cat), + cl::init(jd), + cl::values(clEnumVal(jd, "JSON diff (default)"), + clEnumVal(patch, "RFC 6902"), clEnumVal(merge, "RFC 7386"))); +static cl::opt File1(cl::Positional, cl::desc("FILE1"), + cl::Required, cl::cat(Cat)); +static cl::opt File2(cl::Positional, cl::desc("FILE2"), + cl::cat(Cat)); + +// Reports that reading from `Path` was unsuccessful and exits. +[[noreturn]] static void BadRead(std::string_view Path) noexcept { + llvm::errs() << "Unable to read from '" << Path << "', exiting...\n"; + std::abort(); +} + +// Attempts to read all the data from an input source until the EOF sentry, and +// return it as a string. Returns an empty optional if the read wasn't +// successful. +[[nodiscard]] static std::optional +Read(std::istream &in) noexcept { + in >> std::noskipws; + std::string Data{std::istream_iterator(in), + std::istream_iterator()}; + + if (in.eof()) { + return Data; + } + + return std::nullopt; +} + +// Reads data from an input source specified by `Path`. If `Path` is empty, +// defaults to the character input. Ungracefully aborts if the read is +// unsuccessful. +[[nodiscard]] static std::string Read(std::string_view Path) noexcept { + std::optional Result; + if (Path.empty()) { + assert(std::cin && "file1 shouldn't be read from stdin"); + Result = Read(std::cin); + } else if (auto File = std::ifstream(Path)) { + Result = Read(File); + } + + if (Result) { + return *std::move(Result); + } + + BadRead(Path); +} + +// Reads a JSON object from the file designated by `Path`. If `Path` is empty, +// reads from the character input. If JSON can't be successfully read, then the +// program ungracefully aborts. +[[nodiscard]] static std::optional +ReadJSON(std::string_view Path) noexcept { + std::string Data = Read(Path); + + if (Data.empty()) { + return std::nullopt; + } + + llvm::Expected Result = json::parse(Data); + if (Result) { + return std::move(Result.get()); + } + + llvm::Error Error = Result.takeError(); + llvm::errs() << Error << '\n'; + std::abort(); +} + +// Represents the difference between two JSON values. There are three components +// to all diffs: a path, some removals, and some additions. +// +// Paths are used to map out the sequence of values that lead to the removals +// and additions. Each value contributes slightly differently: +// +// * Scalars don't contribute to paths. +// * Arrays contribute their index if the value is present on the LHS, and -1 +// otherwise. +// * Multisets contribute exactly `[["multiset"],{}]`. +// * Objects contribute the key to their value. +// +// **Example 1** +// +// Given `{"a": {"b": {"c": [3,4,5]}}}` and `{"a": {"b": {"c": [5,6,7]}}}`, we +// end up with the following diff: +// +// ``` +// @ ["a","b","c",2] +// - 5 +// + 7 +// @ ["a","b","c",1] +// - 4 +// + 6 +// @ ["a","b","c",0] +// - 3 +// + 5 +// ``` +// +// This will be rendered as follows when using a multiset: +// +// ``` +// @ ["a","b","c",["multiset"],{}] +// - 3 +// - 4 +// + 7 +// + 6 +// ``` +// +// **Example 2** +// +// Given `[]` and `[1]`, we end up with the following diff: +// +// ``` +// @ [-1] +// + 1 +// ``` +struct DiffResult { + std::vector Path; + std::vector Removals; + std::vector Additions; + + [[nodiscard]] bool is_empty() const noexcept { + return Path.empty() and Removals.empty() and Additions.empty(); + } + + friend std::ostream &operator<<(std::ostream &OS, const DiffResult &Diff) { + if (Diff.is_empty()) { + return OS; + } + + OS << "@ [" << llvm::join(Diff.Path, ",") << "]\n"; + for (std::string_view R : Diff.Removals) { + OS << "- " << R << '\n'; + } + for (std::string_view A : Diff.Additions) { + OS << "+ " << A << '\n'; + } + return OS; + } +}; + +// Writes `Data` to `Path`, or stdout if `Path` is empty. +static void WriteToFile(const std::vector &Data, + std::string_view Path) { + if (Path.empty()) { + Stdout: + llvm::copy(Data, std::ostream_iterator(std::cout)); + return; + } + + std::ofstream File(Path, std::ios_base::trunc); + if (!File) { + llvm::errs() << "Unable to open '" << Path + << "' for writing, defaulting to stdout...\n"; + goto Stdout; + } + + llvm::copy(Data, std::ostream_iterator(File)); +} + +// Converts a JSON value into a string, preserving string quotes. +[[nodiscard]] std::string to_string(const json::Value &V) { + std::string Result; + llvm::raw_string_ostream Stream(Result); + Stream << V; + return Result; +} + +[[nodiscard]] std::string to_string(const json::ObjectKey &K) { + return '"' + K.str() + '"'; +} + +// Returns true if, and only if, `o` represents a scalar. +[[nodiscard]] bool IsScalar(const json::Value &o) { + switch (o.kind()) { + case json::Value::Boolean: + case json::Value::Number: + case json::Value::String: + case json::Value::Null: + return true; + default: + return false; + } +} + +// Returns true if, and only if, ``First`` and ``Second`` are scalars or +// represent different types. +[[nodiscard]] bool CanTriviallyDiff(const json::Value &First, + const json::Value &Second) { + return First.kind() != Second.kind() || IsScalar(First); +} + +// Compares ``First`` and ``Second`` and returns what makes them different. +[[nodiscard]] std::vector +Diff(const json::Array &First, const json::Array &Second, bool IsSet); +[[nodiscard]] std::vector +Diff(const json::Object &First, const json::Object &Second, bool IsSet); + +[[nodiscard]] std::vector +Diff(const json::Value &First, const json::Value &Second, bool IsSet) { + if (CanTriviallyDiff(First, Second)) { + return {{{}, {to_string(First)}, {to_string(Second)}}}; + } + + assert(First.kind() == Second.kind()); + assert((First.kind() == json::Value::Array || + First.kind() == json::Value::Object) && + "json::Value kind unaccounted for"); + + return First.kind() == json::Value::Array + ? Diff(*First.getAsArray(), *Second.getAsArray(), IsSet) + : Diff(*First.getAsObject(), *Second.getAsObject(), IsSet); +} + +// Returns a DiffResult only if the set difference of ``First`` and ``Second`` +// is non-null. +[[nodiscard]] std::optional DiffSet(const json::Array &First, + const json::Array &Second) { + + std::unordered_multiset SecondBag(Second.begin(), Second.end()); + for (const json::Value &I : First) { + if (auto Found = SecondBag.find(I); Found != SecondBag.end()) { + SecondBag.erase(Found); + } + } + + std::unordered_multiset FirstBag(First.begin(), First.end()); + for (const json::Value &I : Second) { + if (auto Found = FirstBag.find(I); Found != FirstBag.end()) { + FirstBag.erase(Found); + } + } + + if (FirstBag.empty() && SecondBag.empty()) { + return std::nullopt; + } + + DiffResult Result = { + {"[\"multiset\"]", "{}"}, + {}, + {}, + }; + + Result.Removals.reserve(FirstBag.size()); + llvm::transform(FirstBag, std::back_inserter(Result.Removals), + [](const json::Value &x) { return to_string(x); }); + Result.Additions.reserve(SecondBag.size()); + llvm::transform(SecondBag, std::back_inserter(Result.Additions), + [](const json::Value &x) { return to_string(x); }); + + return {std::move(Result)}; +} + +std::vector Diff(const json::Array &First, + const json::Array &Second, bool IsSet) { + if (IsSet) { + auto Result = DiffSet(First, Second); + if (Result) { + return {*std::move(Result)}; + } + return {}; + } + + std::vector Result; + std::pair Difference = llvm::mismatch(First, Second); + + while (Difference.first != First.end() && Difference.second != Second.end()) { + std::ptrdiff_t Index = std::distance(First.begin(), Difference.first); + + if (CanTriviallyDiff(*Difference.first, *Difference.second)) { + Result.push_back(DiffResult{{std::to_string(Index)}, + {to_string(*Difference.first)}, + {to_string(*Difference.second)}}); + } else { + std::vector InnerDiff = + Diff(*Difference.first, *Difference.second, IsSet); + for (auto &&D : InnerDiff) { + D.Path.insert(D.Path.cbegin(), std::to_string(Index)); + Result.push_back(std::move(D)); + } + } + + Difference = std::mismatch(Difference.first + 1, First.end(), + Difference.second + 1, Second.end()); + } + + std::ptrdiff_t Index = std::distance(First.begin(), Difference.first); + std::transform( + Difference.first, First.end(), std::back_inserter(Result), + [Index](const json::Value &value) mutable { + return DiffResult{{std::to_string(Index++)}, {to_string(value)}, {}}; + }); + std::transform(Difference.second, Second.end(), std::back_inserter(Result), + [](const json::Value &value) { + return DiffResult{{"-1"}, {}, {to_string(value)}}; + }); + + std::reverse(Result.begin(), Result.end()); + return Result; +} + +std::vector Diff(const json::Object &First, + const json::Object &Second, bool IsSet) { + std::vector Result; + for (const auto &I : First) { + auto Found = Second.find(I.getFirst()); + if (Found == Second.end()) { + Result.push_back( + {{to_string(I.getFirst())}, {to_string(I.getSecond())}, {}}); + continue; + } + + if (I.getSecond() == Found->getSecond()) { + continue; + } + + if (CanTriviallyDiff(I.getSecond(), Found->getSecond())) { + Result.push_back({{to_string(I.getFirst())}, + {to_string(I.getSecond())}, + {to_string(Found->getSecond())}}); + continue; + } else { + std::vector InnerDiff = + Diff(I.getSecond(), Found->getSecond(), IsSet); + for (auto &&D : InnerDiff) { + D.Path.insert(D.Path.cbegin(), to_string(I.getFirst())); + Result.push_back(std::move(D)); + } + } + } + + // Since the first loop takes care of elements with the same keys having + // different values, we just need to extract elements that aren't in First. + for (const auto &I : Second) { + auto Found = First.find(I.getFirst()); + if (Found == First.end()) { + Result.push_back( + {{to_string(I.getFirst())}, {}, {to_string(I.getSecond())}}); + continue; + } + } + + llvm::sort(Result, [](const DiffResult &x, const DiffResult &y) { + return x.Path < y.Path; + }); + return Result; +} +// end lib + +int main(int argc, const char *argv[]) { + llvm::StringRef overview = + "Diff JSON files.\n\n" + "Prints the diff of FILE1 and FILE2 to STDOUT.\n" + "The second input is read from STDIN if FILE2 is omitted.\n"; + llvm::InitLLVM X(argc, argv); + cl::HideUnrelatedOptions(Cat); + if (!cl::ParseCommandLineOptions(argc, argv, overview)) { + return 1; + } + + if (Format.getValue() != jd) { + llvm::errs() << "-f=" << Format.getValue() << " not implemented yet.\n"; + return 1; + } + + std::optional First = ReadJSON(File1.getValue()); + std::optional Second = ReadJSON(File2.getValue()); + if (First == Second) { + return 0; + } + + if (First == std::nullopt) { + WriteToFile({{{}, {}, {to_string(Second)}}}, Output.getValue()); + return 0; + } + + if (Second == std::nullopt) { + WriteToFile({{{}, {to_string(First)}, {}}}, Output.getValue()); + return 0; + } + + std::vector D = Diff(First, Second, Set); + WriteToFile(D, Output.getValue()); + return not D.empty(); +}