diff --git a/clang/include/clang/Basic/DiagnosticOptions.h b/clang/include/clang/Basic/DiagnosticOptions.h --- a/clang/include/clang/Basic/DiagnosticOptions.h +++ b/clang/include/clang/Basic/DiagnosticOptions.h @@ -37,12 +37,12 @@ /// A bitmask representing the diagnostic levels used by /// VerifyDiagnosticConsumer. enum class DiagnosticLevelMask : unsigned { - None = 0, - Note = 1 << 0, - Remark = 1 << 1, + None = 0, + Note = 1 << 0, + Remark = 1 << 1, Warning = 1 << 2, - Error = 1 << 3, - All = Note | Remark | Warning | Error + Error = 1 << 3, + All = Note | Remark | Warning | Error }; inline DiagnosticLevelMask operator~(DiagnosticLevelMask M) { @@ -53,28 +53,28 @@ inline DiagnosticLevelMask operator|(DiagnosticLevelMask LHS, DiagnosticLevelMask RHS) { using UT = std::underlying_type::type; - return static_cast( - static_cast(LHS) | static_cast(RHS)); + return static_cast(static_cast(LHS) | + static_cast(RHS)); } inline DiagnosticLevelMask operator&(DiagnosticLevelMask LHS, DiagnosticLevelMask RHS) { using UT = std::underlying_type::type; - return static_cast( - static_cast(LHS) & static_cast(RHS)); + return static_cast(static_cast(LHS) & + static_cast(RHS)); } -raw_ostream& operator<<(raw_ostream& Out, DiagnosticLevelMask M); +raw_ostream &operator<<(raw_ostream &Out, DiagnosticLevelMask M); /// Options for controlling the compiler diagnostics engine. -class DiagnosticOptions : public RefCountedBase{ +class DiagnosticOptions : public RefCountedBase { friend bool ParseDiagnosticArgs(DiagnosticOptions &, llvm::opt::ArgList &, clang::DiagnosticsEngine *, bool); friend class CompilerInvocation; public: - enum TextDiagnosticFormat { Clang, MSVC, Vi }; + enum TextDiagnosticFormat { Clang, MSVC, Vi, SARIF }; // Default values. enum { @@ -125,8 +125,8 @@ public: // Define accessors/mutators for diagnostic options of enumeration type. #define DIAGOPT(Name, Bits, Default) -#define ENUM_DIAGOPT(Name, Type, Bits, Default) \ - Type get##Name() const { return static_cast(Name); } \ +#define ENUM_DIAGOPT(Name, Type, Bits, Default) \ + Type get##Name() const { return static_cast(Name); } \ void set##Name(Type Value) { Name = static_cast(Value); } #include "clang/Basic/DiagnosticOptions.def" diff --git a/clang/include/clang/Basic/Sarif.h b/clang/include/clang/Basic/Sarif.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Basic/Sarif.h @@ -0,0 +1,436 @@ +//== clang/Basic/Sarif.h - SARIF Diagnostics Object Model -------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// This file defines SarifDocument, a class for pretty-printing clang +// diagnostics in the SARIF standard. The document created can be accessed +// as a JSON Object. This class only implements a valid subset of SARIF, which +// is limited to conforming to all 'SHALL' / 'MUST' definitions for properties +// of interest to Clang. +// +// A SARIF (Static Analysis Results Interchange Format) document is JSON +// document that describes in detail the results of running static analysis +// tools on a project. Each (non-trivial) document consists of at least one +// "run", which are themselves composed of details such as: +// * Tool: The tool that was run +// * Rules: The rules applied during the tool run, represented by +// \c reportingDescriptor objects in SARIF +// * Results: The matches for the rules applied against the project(s) being +// evaluated, represented by \c result objects in SARIF +// +// Reference: +// 1. The +// SARIF standard +// 2. SARIF
reportingDescriptor
+// 3. SARIF
result
+// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_BASIC_SARIF_H +#define CLANG_BASIC_SARIF_H + +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/JSON.h" +#include + +namespace clang { + +using namespace llvm; + +class SarifDocumentWriter; +class LangOptions; +class SourceManager; +class FullSourceRange; + +namespace detail { + +/// An artifact location is SARIF's way of describing the complete Location +/// of an artifact encountered during analysis. The \c artifactLocation object +/// typically consists of a URI, and/or an index to reference the artifact it +/// locates. +/// +/// This builder makes an additional assumption: that every artifact encountered +/// by \c clang will be a physical, top-level artifact. Which is why the static +/// creation method \ref SarifArtifactLocation::create takes a mandatory URI +/// parameter. The official standard states that either a \c URI or \c Index +/// must be available in the object, \c clang picks the \c URI as a reasonable, +/// arbitrary default. +/// +/// Reference: +/// 1. artifactLocation +/// object +/// 2. \ref SarifArtifact +class SarifArtifactLocation { +private: + friend class clang::SarifDocumentWriter; + + llvm::Optional Index; + StringRef URI; + + SarifArtifactLocation(const StringRef &URI) : Index(), URI(URI) {} + +public: + static SarifArtifactLocation create(const StringRef &URI) { + return SarifArtifactLocation{URI}; + } + + SarifArtifactLocation &setIndex(uint32_t Idx) { + this->Index = Idx; + return *this; + } +}; + +/// An artifact in SARIF is any object (a sequence of bytes) addressable by +/// a URI (RFC 3986). The most common type of artifact for clang's use-case +/// would be source files. SARIF's artifact object is described in detail in +/// section 3.24. +// +/// Since every in clang artifact MUST have a location (there being no nested +/// artifacts), the creation method \ref SarifArtifact::create requires a +/// \ref SarifArtifactLocation object +/// +/// Reference: +/// 1. artifact +/// object +class SarifArtifact { +private: + friend class clang::SarifDocumentWriter; + + llvm::Optional Offset; + llvm::Optional Length; + StringRef MimeType; + SarifArtifactLocation Location; + SmallVector Roles; + + SarifArtifact(const SarifArtifactLocation &Loc) + : Offset(), Length(), MimeType(), Location(Loc), Roles() {} + +public: + static SarifArtifact create(const SarifArtifactLocation &Loc) { + return SarifArtifactLocation{Loc}; + } + + SarifArtifact &setOffset(uint32_t Offset) { + this->Offset = Offset; + return *this; + } + + SarifArtifact &setLength(size_t NumBytes) { + this->Length = NumBytes; + return *this; + } + + SarifArtifact &setRoles(const std::initializer_list &Roles) { + this->Roles.assign(Roles); + return *this; + } + + SarifArtifact &setMimeType(const StringRef &MimeType) { + this->MimeType = MimeType; + return *this; + } +}; + +} // namespace detail + +/// A thread flow is a sequence of code locations that specify a possible path +/// through a single thread of execution. +/// A thread flow in SARIF is related to a code flow which describes +/// the progress of one or more programs through one or more thread flows. +/// +/// Reference: +/// 1. threadFlow +/// object +/// 2. codeFlow +/// object +class ThreadFlow { + friend class SarifDocumentWriter; + + FullSourceRange Range; + StringRef Importance; + StringRef Message; + + ThreadFlow() = default; + +public: + static ThreadFlow create() { return {}; } + + ThreadFlow &setRange(const FullSourceRange &Range) { + this->Range = Range; + return *this; + } + + ThreadFlow &setImportance(const StringRef &Importance) { + this->Importance = Importance; + return *this; + } + + ThreadFlow &setMessage(const StringRef &Message) { + this->Message = Message; + return *this; + } +}; + +/// A SARIF rule (\c reportingDescriptor object) contains information that +/// describes a reporting item generated by a tool. A reporting item is +/// either a result of analysis or notification of a condition encountered by +/// the tool. Rules are arbitrary but are identifiable by a hierarchical +/// rule-id. +/// +/// This builder provides an interface to create SARIF \c reportingDescriptor +/// objects via the \ref SarifRule::create static method. +/// +/// Reference: +/// 1. reportingDescriptor +/// object +class SarifRule { + friend class clang::SarifDocumentWriter; + + StringRef Name; + StringRef RuleId; + StringRef Description; + StringRef HelpURI; + + SarifRule() = default; + +public: + static SarifRule create() { return {}; } + + SarifRule &setName(const StringRef &Name) { + this->Name = Name; + return *this; + } + + SarifRule &setRuleId(const StringRef &RuleId) { + this->RuleId = RuleId; + return *this; + } + + SarifRule &setDescription(const StringRef &Description) { + this->Description = Description; + return *this; + } + + SarifRule &setHelpURI(const StringRef &HelpURI) { + this->HelpURI = HelpURI; + return *this; + } +}; + +/// A SARIF result (also called a "reporting item") is a unit of output +/// produced when one of the tool's \c reportingDescriptor encounters a match +/// on the file being analysed by the tool. +/// +/// This builder provides a \ref SarifResult::create static method that can be +/// used to create an empty shell onto which attributes can be added using the +/// \c setX(...) methods. +/// +/// For example: +/// \code{.cpp} +/// SarifResult result = SarifResult::create() +/// .setIndex(...) +/// .setRuleId(...) +/// .setDiagnosticMessage(...); +/// \endcode +/// +/// Reference: +/// 1. SARIF
result
+class SarifResult { + friend class clang::SarifDocumentWriter; + + uint32_t RuleIdx; + StringRef RuleID; + StringRef DiagnosticMessage; + ArrayRef Locations; + ArrayRef ThreadFlows; + + SarifResult() = default; + +public: + static SarifResult create() { return {}; } + + SarifResult &setIndex(uint32_t idx) { + this->RuleIdx = idx; + return *this; + } + + SarifResult &setRuleId(const StringRef &RuleID) { + this->RuleID = RuleID; + return *this; + } + + SarifResult &setDiagnosticMessage(const StringRef &Message) { + this->DiagnosticMessage = Message; + return *this; + } + + SarifResult &setLocations(const ArrayRef &DiagLocs) { + this->Locations = DiagLocs; + return *this; + } + SarifResult &setThreadFlows(const ArrayRef &ThreadFlows) { + this->ThreadFlows = ThreadFlows; + return *this; + } +}; + +/// This class handles creating a valid SARIF document given various input +/// attributes. However, it requires an ordering among certain method calls: +/// +/// 1. Because every SARIF document must contain at least 1 \c run, callers +/// must ensure that \ref SarifDocumentWriter::createRun is is called before +/// anyother methods. +/// 2. If SarifDocumentWriter::endRun is called, callers MUST call +/// SarifDocumentWriter::createRun, before invoking any of the result +/// aggregation methods such as SarifDocumentWriter::appendResult etc. +class SarifDocumentWriter { +private: + const StringRef SchemaURI{ + "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/" + "sarif-schema-2.1.0.json"}; + const StringRef SchemaVersion{"2.1.0"}; + + /// \internal + /// Return a pointer to the current tool. If no run exists, this will + /// crash. + json::Object *getCurrentTool(); + + /// \internal + /// Checks if there is a run associated with this document + /// + /// \return true on success + bool hasRun() const; + + /// \internal + /// Reset portions of the internal state so that the document is ready to + /// recieve data for a new run + void reset(); + + /// \internal + /// \brief Return a mutable pointer to the current run, if it exists. + /// + /// \note If a run does not exist in the SARIF document, calling this will + /// trigger undefined behaviour + json::Object *currentRun(); + + /// Create a code flow object for the given threadflows. + /// See \link ThreadFlow \endlink + /// + /// \note If a run does not exist in the SARIF document, calling this will + /// trigger undefined behaviour + json::Object createCodeFlow(const ArrayRef &ThreadFlows); + + /// Add the given threadflows to the ones this SARIF document knows about + json::Array createThreadFlows(const ArrayRef &ThreadFlows); + + /// Add the given \ref FullSourceRange to the SARIF document as a physical + /// location, with it's corresponding artifact + json::Object createPhysicalLocation(const FullSourceRange &R); + +public: + /// Create a new empty SARIF document + SarifDocumentWriter() = default; + + /// Create a new empty SARIF document with the given language options + SarifDocumentWriter(const LangOptions &LangOpts) : LangOpts(&LangOpts) {} + + /// Release resources held by this SARIF document + ~SarifDocumentWriter() = default; + + /// Create a new run with which any upcoming analysis will be associated. + /// Each run requires specifying the tool that is generating reporting items + void createRun(const StringRef &ShortToolName, const StringRef &LongToolName); + + /// If there is a current run, end it. This method collects various + /// book-keeping required to clear and close resources associated + /// with the current run, but may also allocate some for the next run. + /// + /// If no run exists, this amounts to a no-op. + void endRun(); + + /// Create a new rule, and associate it with the current run + /// Returns integer rule index for the created rule that is unique within + /// the current run + /// + /// \pre + /// There must be a run associated with the document, failing to do so will + /// cause undefined behaviour + size_t createRule(const StringRef &Name, const StringRef &RuleId, + const StringRef &Description, + const StringRef &HelpURI = ""); + + /// Associate the given rule with the current run + /// + /// \pre + /// There must be a run associated with the document, failing to do so will + /// cause undefined behaviour + size_t createRule(const SarifRule &Rule); + + /// Append a new result to the currently in-flight run. + /// + /// \pre + /// There must be a run associated with the document, failing to do so will + /// cause undefined behaviour + /// \pre + /// \c RuleIdx must correspond to a rule known by the SARIF document. i.e. + /// it must be the value returned by a previous call to \ref createRule + void appendResult(size_t RuleIdx, const SarifResult &SarifResult); + + /// Return the SARIF document in its current state. + /// Calling this will trigger a copy of the internal state including all + /// reported diagnostics, resulting in an expensive call. + json::Object createDocument(); + +private: + /// Langauge options to use for the current SARIF document + const LangOptions *LangOpts; + + /// A sequence of SARIF runs + /// A run object describes a single run of an analysis tool and contains the + /// output of that run + /// + /// Reference: run + /// object + json::Array Runs; + + /// The list of rules associated with the most recent active run. These are + /// defined using the diagnostics passed to the SarifDocument. Each rule + /// need not be unique through the result set. E.g. there may be several + /// 'syntax' errors throughout code under analysis, each of which has its + /// own specific diagnostic message (and consequently, RuleId). Rules are + /// also known as "reportingDescriptor" objects in SARIF + /// + /// Reference: rules + /// property + SmallVector CurrentRules; + + /// The list of artifacts that have been encountered on the most recent active + /// run. An artifact is defined in SARIF as a sequence of bytes addressable + /// by a URI. A common example for clang's case would be files named by + /// filesystem paths. + StringMap CurrentArtifacts; +}; +} // namespace clang + +#endif // CLANG_BASIC_SARIF_H diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h --- a/clang/include/clang/Basic/SourceLocation.h +++ b/clang/include/clang/Basic/SourceLocation.h @@ -101,7 +101,7 @@ enum : UIntTy { MacroIDBit = 1ULL << (8 * sizeof(UIntTy) - 1) }; public: - bool isFileID() const { return (ID & MacroIDBit) == 0; } + bool isFileID() const { return (ID & MacroIDBit) == 0; } bool isMacroID() const { return (ID & MacroIDBit) != 0; } /// Return true if this is a valid SourceLocation object. @@ -134,9 +134,9 @@ /// Return a source location with the specified offset from this /// SourceLocation. SourceLocation getLocWithOffset(IntTy Offset) const { - assert(((getOffset()+Offset) & MacroIDBit) == 0 && "offset overflow"); + assert(((getOffset() + Offset) & MacroIDBit) == 0 && "offset overflow"); SourceLocation L; - L.ID = ID+Offset; + L.ID = ID + Offset; return L; } @@ -162,10 +162,10 @@ /// /// This should only be passed to SourceLocation::getFromPtrEncoding, it /// should not be inspected directly. - void* getPtrEncoding() const { + void *getPtrEncoding() const { // Double cast to avoid a warning "cast to pointer from integer of different // size". - return (void*)(uintptr_t)getRawEncoding(); + return (void *)(uintptr_t)getRawEncoding(); } /// Turn a pointer encoding of a SourceLocation object back @@ -227,13 +227,9 @@ bool isValid() const { return B.isValid() && E.isValid(); } bool isInvalid() const { return !isValid(); } - bool operator==(const SourceRange &X) const { - return B == X.B && E == X.E; - } + bool operator==(const SourceRange &X) const { return B == X.B && E == X.E; } - bool operator!=(const SourceRange &X) const { - return B != X.B || E != X.E; - } + bool operator!=(const SourceRange &X) const { return B != X.B || E != X.E; } // Returns true iff other is wholly contained within this range. bool fullyContains(const SourceRange &other) const { @@ -438,7 +434,7 @@ /// Comparison function class, useful for sorting FullSourceLocs. struct BeforeThanCompare { - bool operator()(const FullSourceLoc& lhs, const FullSourceLoc& rhs) const { + bool operator()(const FullSourceLoc &lhs, const FullSourceLoc &rhs) const { return lhs.isBeforeInTranslationUnitThan(rhs); } }; @@ -448,70 +444,99 @@ /// This is useful for debugging. void dump() const; - friend bool - operator==(const FullSourceLoc &LHS, const FullSourceLoc &RHS) { + friend bool operator==(const FullSourceLoc &LHS, const FullSourceLoc &RHS) { return LHS.getRawEncoding() == RHS.getRawEncoding() && - LHS.SrcMgr == RHS.SrcMgr; + LHS.SrcMgr == RHS.SrcMgr; } - friend bool - operator!=(const FullSourceLoc &LHS, const FullSourceLoc &RHS) { + friend bool operator!=(const FullSourceLoc &LHS, const FullSourceLoc &RHS) { return !(LHS == RHS); } }; +/// A pair of FullSourceLoc objects +/// +/// Useful for passing to methods that expect SourceRanges and SourceManagers +/// together. +class FullSourceRange { + FullSourceLoc B; + FullSourceLoc E; + +public: + FullSourceRange() = default; + FullSourceRange(FullSourceLoc Begin, FullSourceLoc End) : B(Begin), E(End) {} + + const FullSourceLoc &getBegin() const { return B; } + const FullSourceLoc &getEnd() const { return E; } + + bool isValid() const { return B.isValid() && E.isValid(); } + bool isInvalid() const { return !isValid(); } + + bool operator==(const FullSourceRange &X) const { + return B == X.B && E == X.E; + } + + bool operator!=(const FullSourceRange &X) const { + return B != X.B || E != X.E; + } + + // Returns true iff other is wholly contained within this range. + bool fullyContains(const FullSourceRange &other) const { + return B <= other.B && E >= other.E; + } + + void print(raw_ostream &OS) const; + std::string printToString() const; + void dump() const; +}; + } // namespace clang namespace llvm { - /// Define DenseMapInfo so that FileID's can be used as keys in DenseMap and - /// DenseSets. - template <> - struct DenseMapInfo { - static clang::FileID getEmptyKey() { - return {}; - } +/// Define DenseMapInfo so that FileID's can be used as keys in DenseMap and +/// DenseSets. +template <> struct DenseMapInfo { + static clang::FileID getEmptyKey() { return {}; } - static clang::FileID getTombstoneKey() { - return clang::FileID::getSentinel(); - } + static clang::FileID getTombstoneKey() { + return clang::FileID::getSentinel(); + } - static unsigned getHashValue(clang::FileID S) { - return S.getHashValue(); - } + static unsigned getHashValue(clang::FileID S) { return S.getHashValue(); } - static bool isEqual(clang::FileID LHS, clang::FileID RHS) { - return LHS == RHS; - } - }; + static bool isEqual(clang::FileID LHS, clang::FileID RHS) { + return LHS == RHS; + } +}; - /// Define DenseMapInfo so that SourceLocation's can be used as keys in - /// DenseMap and DenseSet. This trait class is eqivalent to - /// DenseMapInfo which uses SourceLocation::ID is used as a key. - template <> struct DenseMapInfo { - static clang::SourceLocation getEmptyKey() { - constexpr clang::SourceLocation::UIntTy Zero = 0; - return clang::SourceLocation::getFromRawEncoding(~Zero); - } +/// Define DenseMapInfo so that SourceLocation's can be used as keys in +/// DenseMap and DenseSet. This trait class is eqivalent to +/// DenseMapInfo which uses SourceLocation::ID is used as a key. +template <> struct DenseMapInfo { + static clang::SourceLocation getEmptyKey() { + constexpr clang::SourceLocation::UIntTy Zero = 0; + return clang::SourceLocation::getFromRawEncoding(~Zero); + } - static clang::SourceLocation getTombstoneKey() { - constexpr clang::SourceLocation::UIntTy Zero = 0; - return clang::SourceLocation::getFromRawEncoding(~Zero - 1); - } + static clang::SourceLocation getTombstoneKey() { + constexpr clang::SourceLocation::UIntTy Zero = 0; + return clang::SourceLocation::getFromRawEncoding(~Zero - 1); + } - static unsigned getHashValue(clang::SourceLocation Loc) { - return Loc.getHashValue(); - } + static unsigned getHashValue(clang::SourceLocation Loc) { + return Loc.getHashValue(); + } - static bool isEqual(clang::SourceLocation LHS, clang::SourceLocation RHS) { - return LHS == RHS; - } - }; + static bool isEqual(clang::SourceLocation LHS, clang::SourceLocation RHS) { + return LHS == RHS; + } +}; - // Allow calling FoldingSetNodeID::Add with SourceLocation object as parameter - template <> struct FoldingSetTrait { - static void Profile(const clang::SourceLocation &X, FoldingSetNodeID &ID); - }; +// Allow calling FoldingSetNodeID::Add with SourceLocation object as parameter +template <> struct FoldingSetTrait { + static void Profile(const clang::SourceLocation &X, FoldingSetNodeID &ID); +}; } // namespace llvm diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5174,8 +5174,8 @@ HelpText<"File for serializing diagnostics in a binary format">; def fdiagnostics_format : Separate<["-"], "fdiagnostics-format">, - HelpText<"Change diagnostic formatting to match IDE and command line tools">, Values<"clang,msvc,vi">, - NormalizedValuesScope<"DiagnosticOptions">, NormalizedValues<["Clang", "MSVC", "Vi"]>, + HelpText<"Change diagnostic formatting to match IDE and command line tools">, Values<"clang,msvc,vi,sarif">, + NormalizedValuesScope<"DiagnosticOptions">, NormalizedValues<["Clang", "MSVC", "Vi", "SARIF"]>, MarshallingInfoEnum, "Clang">; def fdiagnostics_show_category : Separate<["-"], "fdiagnostics-show-category">, HelpText<"Print diagnostic category">, Values<"none,id,name">, diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -62,6 +62,7 @@ NoSanitizeList.cpp SanitizerSpecialCaseList.cpp Sanitizers.cpp + Sarif.cpp SourceLocation.cpp SourceManager.cpp Stack.cpp diff --git a/clang/lib/Basic/Sarif.cpp b/clang/lib/Basic/Sarif.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Basic/Sarif.cpp @@ -0,0 +1,361 @@ +#include "clang/Basic/Sarif.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/Version.h" +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Path.h" + +#include +#include +#include + +using namespace clang; +using namespace llvm; + +using clang::detail::SarifArtifact; +using clang::detail::SarifArtifactLocation; + +namespace { + +StringRef getFileName(const FileEntry &FE) { + StringRef Filename = FE.tryGetRealPathName(); + if (Filename.empty()) + Filename = FE.getName(); + return Filename; +} +/// \name URI +/// @{ + +/// \internal +/// \brief +/// Return the RFC3986 encoding of the input character +/// +/// \param C Character to encode to RFC3986 +/// +/// \return The RFC3986 representation of \c C +std::string percentEncodeURICharacter(char C) { + // RFC 3986 claims alpha, numeric, and this handful of + // characters are not reserved for the path component and + // should be written out directly. Otherwise, percent + // encode the character and write that out instead of the + // reserved character. + if (llvm::isAlnum(C) || + StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) + return std::string(&C, 1); + return "%" + llvm::toHex(StringRef(&C, 1)); +} + +/// \internal +/// \brief Return a URI representing the given file name +/// +/// \param Filename +/// +/// \return RFC3986 URI representing the input file name +std::string fileNameToURI(StringRef Filename) { + llvm::SmallString<32> Ret = StringRef("file://"); + + // Get the root name to see if it has a URI authority. + StringRef Root = sys::path::root_name(Filename); + if (Root.startswith("//")) { + // There is an authority, so add it to the URI. + Ret += Root.drop_front(2).str(); + } else if (!Root.empty()) { + // There is no authority, so end the component and add the root to the URI. + Ret += Twine("/" + Root).str(); + } + + auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); + assert(Iter != End && "Expected there to be a non-root path component."); + // Add the rest of the path components, encoding any reserved characters; + // we skip past the first path component, as it was handled it above. + std::for_each(++Iter, End, [&Ret](StringRef Component) { + // For reasons unknown to me, we may get a backslash with Windows native + // paths for the initial backslash following the drive component, which + // we need to ignore as a URI path part. + if (Component == "\\") + return; + + // Add the separator between the previous path part and the one being + // currently processed. + Ret += "/"; + + // URI encode the part. + for (char C : Component) { + Ret += percentEncodeURICharacter(C); + } + }); + + return std::string(Ret); +} +/// @} + +/// \brief Calculate the column position expressed in the number of UTF-8 code +/// points from column start to the source location +/// +/// \param Loc The source location whose column needs to be calculated +/// \param TokenLen Optional hint for when the token is multiple bytes long +/// +/// \return The column number as a UTF-8 aware byte offset from column start to +/// the effective source location +unsigned int adjustColumnPos(FullSourceLoc Loc, unsigned int TokenLen = 0) { + assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); + + std::pair LocInfo = Loc.getDecomposedLoc(); + assert(LocInfo.second > Loc.getExpansionColumnNumber() && + "position in file is before column number?"); + + Optional Buf = + Loc.getManager().getBufferOrNone(LocInfo.first); + assert(Buf && "got an invalid buffer for the location's file"); + assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && + "token extends past end of buffer?"); + + // Adjust the offset to be the start of the line, since we'll be counting + // Unicode characters from there until our column offset. + unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); + unsigned int Ret = 1; + while (Off < (LocInfo.second + TokenLen)) { + Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); + Ret++; + } + + return Ret; +} + +/// \name SARIF Utilities +/// @{ + +/// \internal +json::Object createMessage(StringRef Text) { + return json::Object{{"text", Text.str()}}; +} + +/// \internal +json::Object createTextRegion(const LangOptions &LO, const FullSourceRange &R) { + json::Object Region{{"startLine", R.getBegin().getExpansionLineNumber()}, + {"startColumn", adjustColumnPos(R.getBegin())}}; + if (R.getBegin() == R.getEnd()) { + Region["endColumn"] = adjustColumnPos(R.getBegin()); + } else { + Region["endLine"] = R.getEnd().getExpansionLineNumber(); + Region["endColumn"] = adjustColumnPos( + R.getEnd(), Lexer::MeasureTokenLength(R.getEnd().getLocWithOffset(0), + R.getEnd().getManager(), LO)); + } + return Region; +} + +json::Object createLocation(json::Object &&PhysicalLocation, + StringRef Message = "") { + json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; + if (!Message.empty()) + Ret.insert({"message", createMessage(Message)}); + return Ret; +} + +json::Object createThreadFlowLocation(json::Object &&Location, + const StringRef &Importance) { + return json::Object{{"locations", std::move(Location)}, + {"importance", Importance}}; +} +/// @} + +} // namespace + +json::Object +SarifDocumentWriter::createPhysicalLocation(const FullSourceRange &R) { + assert(R.isValid() && + "Cannot create a physicalLocation from invalid SourceRange!"); + const FileEntry *FE = R.getBegin().getExpansionLoc().getFileEntry(); + assert(FE != nullptr && "Diagnostic does not exist within a valid file!"); + + const std::string &FileURI = fileNameToURI(getFileName(*FE)); + auto I = CurrentArtifacts.find(FileURI); + + if (I == CurrentArtifacts.end()) { + uint32_t Idx = static_cast(CurrentArtifacts.size()); + const SarifArtifactLocation &location = + SarifArtifactLocation::create(FileURI).setIndex(Idx); + const SarifArtifact &artifact = SarifArtifact::create(location) + .setRoles({"resultFile"}) + .setLength(FE->getSize()) + .setMimeType("text/plain"); + auto statusIter = CurrentArtifacts.insert({FileURI, artifact}); + // If inserted, ensure the original iterator points to the newly inserted + // element, so it can be used downstream + if (statusIter.second) { + I = statusIter.first; + } + } + assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); + const SarifArtifactLocation &location = I->second.Location; + auto Idx = location.Index.getValue(); + return json::Object{ + {{"artifacts", json::Object{{{"uri", FileURI}, {"index", Idx}}}}, + {"region", createTextRegion(*LangOpts, R)}}}; +} + +json::Object *SarifDocumentWriter::getCurrentTool() { + assert(hasRun() && "Need to call createRun() before using getcurrentTool!"); + return Runs.back().getAsObject()->get("tool")->getAsObject(); +} + +void SarifDocumentWriter::reset() { + CurrentRules.clear(); + CurrentArtifacts.clear(); +} + +void SarifDocumentWriter::endRun() { + if (!hasRun()) { + return; + } + + // Flush all the rules + json::Object &Tool = *getCurrentTool(); + json::Array Rules{}; + for (const SarifRule &R : CurrentRules) { + json::Object theRule{{"name", R.Name}, + {"ruleId", R.RuleId}, + {"fullDescription", R.Description}}; + if (!R.HelpURI.empty()) { + theRule["helpUri"] = R.HelpURI; + } + Rules.emplace_back(std::move(theRule)); + } + Tool["rules"] = std::move(Rules); + + // Flush all the artifacts + json::Array *Artifacts = currentRun()->getArray("artifacts"); + for (const auto &kv : CurrentArtifacts) { + const SarifArtifact &A = kv.getValue(); + json::Object Loc{{"uri", A.Location.URI}}; + if (A.Location.Index.hasValue()) { + Loc["index"] = static_cast(A.Location.Index.getValue()); + } + json::Object theArtifact; + theArtifact["location"] = std::move(Loc); + if (A.Length.hasValue()) { + theArtifact["length"] = static_cast(A.Length.getValue()); + } + if (!A.Roles.empty()) { + theArtifact["roles"] = json::Array(A.Roles); + } + if (!A.MimeType.empty()) { + theArtifact["mimeType"] = A.MimeType; + } + if (A.Offset.hasValue()) { + theArtifact["offset"] = A.Offset; + } + Artifacts->push_back(json::Value(std::move(theArtifact))); + } + + // Clear, reset temporaries before new run + reset(); +} + +json::Array SarifDocumentWriter::createThreadFlows( + const ArrayRef &ThreadFlows) { + json::Object Ret{{"locations", json::Array{}}}; + json::Array Locs{}; + for (const auto &ThreadFlow : ThreadFlows) { + json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); + json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); + Locs.emplace_back( + createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); + } + Ret["locations"] = std::move(Locs); + return json::Array{std::move(Ret)}; +} + +json::Object +SarifDocumentWriter::createCodeFlow(const ArrayRef &ThreadFlows) { + return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; +} + +void SarifDocumentWriter::createRun(const StringRef &ShortToolName, + const StringRef &LongToolName) { + // Clear resources associated with a previous run + endRun(); + + json::Object Tool{ + {"driver", + json::Object{{"name", ShortToolName}, + {"fullName", LongToolName}, + {"language", "en-US"}, + {"version", getClangToolFullVersion(ShortToolName)}}}}; + json::Object currentRun{{"tool", std::move(Tool)}, + {"results", {}}, + {"artifacts", {}}, + {"columnKind", "unicodeCodePoints"}}; + Runs.emplace_back(std::move(currentRun)); +} + +bool SarifDocumentWriter::hasRun() const { return Runs.size() != 0; } + +json::Object *SarifDocumentWriter::currentRun() { + assert(hasRun() && "SARIF Document has no runs, create a run first!"); + return Runs.back().getAsObject(); +} + +size_t SarifDocumentWriter::createRule(const StringRef &Name, + const StringRef &RuleId, + const StringRef &Description, + const StringRef &HelpURI) { + size_t Ret = CurrentRules.size(); + SarifRule Rule = SarifRule::create() + .setName(Name) + .setRuleId(RuleId) + .setDescription(Description) + .setHelpURI(HelpURI); + CurrentRules.emplace_back(Rule); + return Ret; +} + +size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { + size_t Ret = CurrentRules.size(); + CurrentRules.emplace_back(Rule); + return Ret; +} + +void SarifDocumentWriter::appendResult(size_t RuleIdx, + const SarifResult &Result) { + assert(RuleIdx < CurrentRules.size() && + "Trying to reference a rule that doesn't exist"); + json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, + {"ruleIndex", static_cast(RuleIdx)}, + {"ruleId", CurrentRules[RuleIdx].RuleId}}; + if (Result.Locations.size() != 0) { + json::Array Locs{}; + for (auto &Range : Result.Locations) { + Locs.emplace_back(createLocation(createPhysicalLocation(Range))); + } + Ret["locations"] = std::move(Locs); + } + if (Result.ThreadFlows.size() != 0) { + Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; + } + json::Object *Run = currentRun(); + json::Array *Results = Run->getArray("results"); + Results->emplace_back(std::move(Ret)); +} + +json::Object SarifDocumentWriter::createDocument() { + // Flush all temporaries to their destinations if needed + endRun(); + + json::Object doc{ + {"$schema", SchemaURI}, + {"version", SchemaVersion}, + }; + if (Runs.size() > 0) { + doc["runs"] = json::Array(Runs); + } + return doc; +} diff --git a/clang/lib/Basic/SourceLocation.cpp b/clang/lib/Basic/SourceLocation.cpp --- a/clang/lib/Basic/SourceLocation.cpp +++ b/clang/lib/Basic/SourceLocation.cpp @@ -59,7 +59,7 @@ ID.AddInteger(X.ID); } -void SourceLocation::print(raw_ostream &OS, const SourceManager &SM)const{ +void SourceLocation::print(raw_ostream &OS, const SourceManager &SM) const { if (!isValid()) { OS << ""; return; @@ -73,8 +73,8 @@ return; } // The macro expansion and spelling pos is identical for file locs. - OS << PLoc.getFilename() << ':' << PLoc.getLine() - << ':' << PLoc.getColumn(); + OS << PLoc.getFilename() << ':' << PLoc.getLine() << ':' + << PLoc.getColumn(); return; } @@ -270,3 +270,30 @@ std::pair FullSourceLoc::getDecomposedLoc() const { return SrcMgr->getDecomposedLoc(*this); } + +//===----------------------------------------------------------------------===// +// FullSourceRange +//===----------------------------------------------------------------------===// + +void FullSourceRange::print(raw_ostream &OS) const { + + OS << '<'; + auto PrintedLoc = PrintDifference(OS, B.getManager(), B, {}); + if (B != E) { + OS << ", "; + PrintDifference(OS, E.getManager(), E, PrintedLoc); + } + OS << '>'; +} + +LLVM_DUMP_METHOD std::string FullSourceRange::printToString() const { + std::string S; + llvm::raw_string_ostream OS(S); + print(OS); + return OS.str(); +} + +LLVM_DUMP_METHOD void FullSourceRange::dump() const { + this->print(llvm::errs()); + llvm::errs() << '\n'; +} diff --git a/clang/unittests/Basic/CMakeLists.txt b/clang/unittests/Basic/CMakeLists.txt --- a/clang/unittests/Basic/CMakeLists.txt +++ b/clang/unittests/Basic/CMakeLists.txt @@ -10,6 +10,7 @@ FileManagerTest.cpp LineOffsetMappingTest.cpp SanitizersTest.cpp + SarifTest.cpp SourceManagerTest.cpp ) diff --git a/clang/unittests/Basic/SarifTest.cpp b/clang/unittests/Basic/SarifTest.cpp new file mode 100644 --- /dev/null +++ b/clang/unittests/Basic/SarifTest.cpp @@ -0,0 +1,153 @@ +//===- unittests/Basic/SarifTest.cpp - Test writing SARIF documents -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Sarif.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/JSON.h" +#include + +#include "gmock/gmock.h" +#include "gtest/gtest-death-test.h" +#include "gtest/gtest.h" + +using namespace clang; +using namespace llvm; + +namespace { + +TEST(SarifDocumentWriterTest, createEmptyDocument) { + // GIVEN: + SarifDocumentWriter writer; + + // WHEN: + const json::Object &emptyDocument = writer.createDocument(); + std::vector keys(emptyDocument.size()); + std::transform(emptyDocument.begin(), emptyDocument.end(), keys.begin(), + [](auto item) { return item.getFirst(); }); + + // THEN: + ASSERT_THAT(keys, testing::UnorderedElementsAre("$schema", "version")); +} + +// Test that a newly inserted run will associate correct tool names +TEST(SarifDocumentWriterTest, documentWithARun) { + // GIVEN: + SarifDocumentWriter writer; + const char *shortName = "sariftest"; + const char *longName = "sarif writer test"; + + // WHEN: + writer.createRun(shortName, longName); + writer.endRun(); + const json::Object &document = writer.createDocument(); + const json::Array *runs = document.getArray("runs"); + + // THEN: + // A run was created + ASSERT_THAT(runs, testing::NotNull()); + + // It is the only run + ASSERT_EQ(runs->size(), 1UL); + + // The tool associated with the run was the tool + const json::Object *driver = + runs->begin()->getAsObject()->getObject("tool")->getObject("driver"); + ASSERT_THAT(driver, testing::NotNull()); + + ASSERT_TRUE(driver->getString("name").hasValue()); + ASSERT_TRUE(driver->getString("fullName").hasValue()); + ASSERT_TRUE(driver->getString("language").hasValue()); + + EXPECT_EQ(driver->getString("name").getValue(), shortName); + EXPECT_EQ(driver->getString("fullName").getValue(), longName); + EXPECT_EQ(driver->getString("language").getValue(), "en-US"); +} + +// Test adding result without a run causes a crash +TEST(SarifDocumentWriterTest, addingResultsWillCrashIfThereIsNoRun) { + // GIVEN: + SarifDocumentWriter writer; + SarifResult &&emptyResult = SarifResult::create(); + + // WHEN: + // A SarifDocumentWriter::createRun(...) was not called prior to + // SarifDocumentWriter::appendResult(...) + // But a rule exists + auto ruleIdx = writer.createRule(SarifRule::create()); + + // THEN: + ASSERT_DEATH({ writer.appendResult(ruleIdx, emptyResult); }, + ".*create a run first.*"); +} + +// Test adding result for invalid ruleIdx causes a crash +TEST(SarifDocumentWriterTest, addingResultsWithoutRuleWillCrash) { + // GIVEN: + SarifDocumentWriter writer; + SarifResult &&emptyResult = SarifResult::create(); + + // WHEN: + writer.createRun("sarif test", "sarif test runner"); + // But caller forgot to create a rule for this run: + + // THEN: + ASSERT_DEATH({ writer.appendResult(0, emptyResult); }, + "Trying to reference a rule that doesn't exist"); +} + +// Test adding rule and result shows up in the final document +TEST(SarifDocumentWriterTest, addResultWIthValidRuleIsOk) { + // GIVEN: + SarifDocumentWriter writer; + const SarifResult &result = SarifResult::create(); + const SarifRule &rule = + SarifRule::create() + .setRuleId("clang.unittest") + .setDescription("Example rule created during unit tests") + .setName("clang unit test"); + + // WHEN: + writer.createRun("sarif test", "sarif test runner"); + unsigned ruleIdx = writer.createRule(rule); + writer.appendResult(ruleIdx, result); + const json::Object &document = writer.createDocument(); + + // THEN: + // A document with a valid schema and version exists + ASSERT_THAT(document.get("$schema"), ::testing::NotNull()); + ASSERT_THAT(document.get("version"), ::testing::NotNull()); + const json::Array *runs = document.getArray("runs"); + + // A run exists on this document + ASSERT_THAT(runs, ::testing::NotNull()); + ASSERT_EQ(runs->size(), 1UL); + const json::Object *theRun = runs->back().getAsObject(); + + // The run has slots for tools, results, rules and artifacts + ASSERT_THAT(theRun->get("tool"), ::testing::NotNull()); + ASSERT_THAT(theRun->get("results"), ::testing::NotNull()); + ASSERT_THAT(theRun->get("artifacts"), ::testing::NotNull()); + const json::Object *driver = theRun->getObject("tool")->getObject("driver"); + const json::Array *results = theRun->getArray("results"); + const json::Array *artifacts = theRun->getArray("artifacts"); + + // The tool is as expected + ASSERT_TRUE(driver->getString("name").hasValue()); + ASSERT_TRUE(driver->getString("fullName").hasValue()); + + EXPECT_EQ(driver->getString("name").getValue(), "sarif test"); + EXPECT_EQ(driver->getString("fullName").getValue(), "sarif test runner"); + + // The results are as expected + EXPECT_EQ(results->size(), 1UL); + + // The artifacts are as expected + EXPECT_TRUE(artifacts->empty()); +} + +} // namespace