diff --git a/clang/include/clang/Basic/Sarif.h b/clang/include/clang/Basic/Sarif.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Basic/Sarif.h @@ -0,0 +1,434 @@ +//== clang/Basic/Sarif.h - SARIF Diagnostics Object Model -------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// Defines clang::SarifDocumentWriter, clang::SarifRule, clang::SarifResult. +/// +/// The document built can be accessed as a JSON Object. +/// Several value semantic types are also introduced which represent properties +/// of the SARIF standard, such as 'artifact', 'result', 'rule'. +/// +/// A SARIF (Static Analysis Results Interchange Format) document is JSON +/// document that describes in detail the results of running static analysis +/// tools on a project. Each (non-trivial) document consists of at least one +/// "run", which are themselves composed of details such as: +/// * Tool: The tool that was run +/// * Rules: The rules applied during the tool run, represented by +/// \c reportingDescriptor objects in SARIF +/// * Results: The matches for the rules applied against the project(s) being +/// evaluated, represented by \c result objects in SARIF +/// +/// Reference: +/// 1. The SARIF standard +/// 2. SARIF
reportingDescriptor
+/// 3. SARIF
result
+//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_SARIF_H +#define LLVM_CLANG_BASIC_SARIF_H + +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/JSON.h" +#include + +namespace clang { + +using namespace llvm; + +class SarifDocumentWriter; +class SourceManager; +class FullSourceRange; + +namespace detail { + +/// \internal +/// An artifact location is SARIF's way of describing the complete Location +/// of an artifact encountered during analysis. The \c artifactLocation object +/// typically consists of a URI, and/or an index to reference the artifact it +/// locates. +/// +/// This builder makes an additional assumption: that every artifact encountered +/// by \c clang will be a physical, top-level artifact. Which is why the static +/// creation method \ref SarifArtifactLocation::create takes a mandatory URI +/// parameter. The official standard states that either a \c URI or \c Index +/// must be available in the object, \c clang picks the \c URI as a reasonable +/// default, because it intends to deal in physical artifacts for now. +/// +/// Reference: +/// 1. artifactLocation object +/// 2. \ref SarifArtifact +class SarifArtifactLocation { +private: + friend class clang::SarifDocumentWriter; + + Optional Index; + std::string URI; + + explicit SarifArtifactLocation(const std::string &URI) : URI(URI) {} + +public: + static SarifArtifactLocation create(StringRef URI) { + return SarifArtifactLocation{URI.str()}; + } + + SarifArtifactLocation &setIndex(uint32_t Idx) { + Index = Idx; + return *this; + } +}; + +/// \internal +/// An artifact in SARIF is any object (a sequence of bytes) addressable by +/// a URI (RFC 3986). The most common type of artifact for clang's use-case +/// would be source files. SARIF's artifact object is described in detail in +/// section 3.24. +// +/// Since every clang artifact MUST have a location (there being no nested +/// artifacts), the creation method \ref SarifArtifact::create requires a +/// \ref SarifArtifactLocation object. +/// +/// Reference: +/// 1. artifact object +class SarifArtifact { +private: + friend class clang::SarifDocumentWriter; + + Optional Offset; + Optional Length; + std::string MimeType; + SarifArtifactLocation Location; + SmallVector Roles; + + explicit SarifArtifact(const SarifArtifactLocation &Loc) : Location(Loc) {} + +public: + static SarifArtifact create(const SarifArtifactLocation &Loc) { + return SarifArtifact{Loc}; + } + + SarifArtifact setOffset(uint32_t ArtifactOffset) { + Offset = ArtifactOffset; + return *this; + } + + SarifArtifact setLength(size_t NumBytes) { + Length = NumBytes; + return *this; + } + + SarifArtifact setRoles(std::initializer_list ArtifactRoles) { + Roles.assign(ArtifactRoles); + return *this; + } + + SarifArtifact setMimeType(StringRef ArtifactMimeType) { + MimeType = ArtifactMimeType.str(); + return *this; + } +}; + +} // namespace detail + +enum class ThreadFlowImportance { Important, Essential, Unimportant }; + +/// A thread flow is a sequence of code locations that specify a possible path +/// through a single thread of execution. +/// A thread flow in SARIF is related to a code flow which describes +/// the progress of one or more programs through one or more thread flows. +/// +/// Reference: +/// 1. threadFlow object +/// 2. codeFlow object +class ThreadFlow { + friend class SarifDocumentWriter; + + FullSourceRange Range; + ThreadFlowImportance Importance; + std::string Message; + + ThreadFlow() = default; + +public: + static ThreadFlow create() { return {}; } + + ThreadFlow setRange(const FullSourceRange &ItemRange) { + Range = ItemRange; + return *this; + } + + ThreadFlow setImportance(ThreadFlowImportance ItemImportance) { + Importance = ItemImportance; + return *this; + } + + ThreadFlow setMessage(StringRef ItemMessage) { + Message = ItemMessage.str(); + return *this; + } +}; + +/// A SARIF rule (\c reportingDescriptor object) contains information that +/// describes a reporting item generated by a tool. A reporting item is +/// either a result of analysis or notification of a condition encountered by +/// the tool. Rules are arbitrary but are identifiable by a hierarchical +/// rule-id. +/// +/// This builder provides an interface to create SARIF \c reportingDescriptor +/// objects via the \ref SarifRule::create static method. +/// +/// Reference: +/// 1. reportingDescriptor object +class SarifRule { + friend class clang::SarifDocumentWriter; + + std::string Name; + std::string Id; + std::string Description; + std::string HelpURI; + + SarifRule() = default; + +public: + static SarifRule create() { return {}; } + + SarifRule setName(StringRef RuleName) { + Name = RuleName.str(); + return *this; + } + + SarifRule setRuleId(StringRef RuleId) { + Id = RuleId.str(); + return *this; + } + + SarifRule setDescription(StringRef RuleDesc) { + Description = RuleDesc.str(); + return *this; + } + + SarifRule setHelpURI(StringRef RuleHelpURI) { + HelpURI = RuleHelpURI.str(); + return *this; + } +}; + +/// A SARIF result (also called a "reporting item") is a unit of output +/// produced when one of the tool's \c reportingDescriptor encounters a match +/// on the file being analysed by the tool. +/// +/// This builder provides a \ref SarifResult::create static method that can be +/// used to create an empty shell onto which attributes can be added using the +/// \c setX(...) methods. +/// +/// For example: +/// \code{.cpp} +/// SarifResult result = SarifResult::create(...) +/// .setRuleId(...) +/// .setDiagnosticMessage(...); +/// \endcode +/// +/// Reference: +/// 1. SARIF
result
+class SarifResult { + friend class clang::SarifDocumentWriter; + + // NOTE: + // This type cannot fit all possible indexes representable by JSON, but is + // chosen because it has to be non-negative, and because the JSON encoder + // used requires this be a type that can be safely promoted to \c int64_t + uint32_t RuleIdx; + std::string RuleId; + std::string DiagnosticMessage; + SmallVector Locations; + SmallVector ThreadFlows; + + SarifResult() = delete; + explicit SarifResult(uint32_t RuleIdx) : RuleIdx(RuleIdx) {} + +public: + static SarifResult create(uint32_t RuleIdx) { return SarifResult{RuleIdx}; } + + SarifResult setIndex(uint32_t Idx) { + RuleIdx = Idx; + return *this; + } + + SarifResult setRuleId(StringRef Id) { + RuleId = Id.str(); + return *this; + } + + SarifResult setDiagnosticMessage(StringRef Message) { + DiagnosticMessage = Message.str(); + return *this; + } + + SarifResult setLocations(ArrayRef DiagLocs) { + Locations.assign(DiagLocs.begin(), DiagLocs.end()); + return *this; + } + SarifResult setThreadFlows(ArrayRef ThreadFlowResults) { + ThreadFlows.assign(ThreadFlowResults.begin(), ThreadFlowResults.end()); + return *this; + } +}; + +/// This class handles creating a valid SARIF document given various input +/// attributes. However, it requires an ordering among certain method calls: +/// +/// 1. Because every SARIF document must contain at least 1 \c run, callers +/// must ensure that \ref SarifDocumentWriter::createRun is is called before +/// any other methods. +/// 2. If SarifDocumentWriter::endRun is called, callers MUST call +/// SarifDocumentWriter::createRun, before invoking any of the result +/// aggregation methods such as SarifDocumentWriter::appendResult etc. +class SarifDocumentWriter { +private: + const StringRef SchemaURI{ + "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/" + "sarif-schema-2.1.0.json"}; + const StringRef SchemaVersion{"2.1.0"}; + + /// \internal + /// Return a pointer to the current tool. Asserts that a run exists. + json::Object &getCurrentTool(); + + /// \internal + /// Checks if there is a run associated with this document. + /// + /// \return true on success + bool hasRun() const; + + /// \internal + /// Reset portions of the internal state so that the document is ready to + /// receive data for a new run. + void reset(); + + /// \internal + /// Return a mutable reference to the current run, after asserting it exists. + /// + /// \note It is undefined behavior to call this if a run does not exist in + /// the SARIF document. + json::Object &getCurrentRun(); + + /// Create a code flow object for the given threadflows. + /// See \ref ThreadFlow. + /// + /// \note It is undefined behavior to call this if a run does not exist in + /// the SARIF document. + json::Object createCodeFlow(ArrayRef ThreadFlows); + + /// Add the given threadflows to the ones this SARIF document knows about. + json::Array createThreadFlows(ArrayRef ThreadFlows); + + /// Add the given \ref FullSourceRange to the SARIF document as a physical + /// location, with its corresponding artifact. + json::Object createPhysicalLocation(const FullSourceRange &R); + +public: + /// Create a new empty SARIF document + SarifDocumentWriter() : Closed(true){}; + + /// Create a new empty SARIF document with the given language options + SarifDocumentWriter(const LangOptions &LangOpts) + : LangOpts(LangOpts), Closed(true) {} + + /// Release resources held by this SARIF document + ~SarifDocumentWriter() = default; + + /// Create a new run with which any upcoming analysis will be associated. + /// Each run requires specifying the tool that is generating reporting items. + void createRun(StringRef ShortToolName, StringRef LongToolName); + + /// If there is a current run, end it. This method collects various + /// book-keeping required to clear and close resources associated + /// with the current run, but may also allocate some for the next run. + /// + /// If no run exists, this amounts to a no-op. + void endRun(); + + /// Create a new rule, and associate it with the current run + /// Returns integer rule index for the created rule that is unique within + /// the current run, which can then be used to create a \ref SarifResult + /// to add to the current run. Note that a rule must exist before being + /// referenced by a result. + /// + /// \pre + /// There must be a run associated with the document, failing to do so will + /// cause undefined behaviour. + size_t createRule(StringRef Name, StringRef RuleId, StringRef Description, + StringRef HelpURI = ""); + + /// Associate the given rule with the current run. + /// Returns integer rule index for the created rule that is unique within + /// the current run, which can then be used to create a \ref SarifResult + /// to add to the current run. Note that a rule must exist before being + /// referenced by a result. + /// + /// \pre + /// There must be a run associated with the document, failing to do so will + /// cause undefined behaviour. + size_t createRule(const SarifRule &Rule); + + /// Append a new result to the currently in-flight run. + /// + /// \pre + /// There must be a run associated with the document, failing to do so will + /// cause undefined behaviour. + /// \pre + /// \c RuleIdx used to create the result must correspond to a rule known by + /// the SARIF document. i.e. it must be the value returned by a previous call + /// to \ref createRule. + void appendResult(const SarifResult &SarifResult); + + /// Return the SARIF document in its current state. + /// Calling this will trigger a copy of the internal state including all + /// reported diagnostics, resulting in an expensive call. + json::Object createDocument(); + +private: + /// Langauge options to use for the current SARIF document + const LangOptions LangOpts; + + /// Flag to track the state of this document: + /// A closed document is one on which a new runs must be created. + /// This could be a document that is freshly created, or has recently + /// finished writing to a previous run + bool Closed; + + /// A sequence of SARIF runs. + /// A run object describes a single run of an analysis tool and contains the + /// output of that run. + /// + /// Reference: run object + json::Array Runs; + + /// The list of rules associated with the most recent active run. These are + /// defined using the diagnostics passed to the SarifDocument. Each rule + /// need not be unique through the result set. E.g. there may be several + /// 'syntax' errors throughout code under analysis, each of which has its + /// own specific diagnostic message (and consequently, RuleId). Rules are + /// also known as "reportingDescriptor" objects in SARIF. + /// + /// Reference: rules property + SmallVector CurrentRules; + + /// The list of artifacts that have been encountered on the most recent active + /// run. An artifact is defined in SARIF as a sequence of bytes addressable + /// by a URI. A common example for clang's case would be files named by + /// filesystem paths. + StringMap CurrentArtifacts; +}; +} // namespace clang + +#endif // LLVM_CLANG_BASIC_SARIF_H diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h --- a/clang/include/clang/Basic/SourceLocation.h +++ b/clang/include/clang/Basic/SourceLocation.h @@ -458,6 +458,42 @@ } }; +/// A pair of FullSourceLoc objects +/// +/// Useful for passing to methods that expect SourceRanges and SourceManagers +/// together. +class FullSourceRange { + FullSourceLoc B; + FullSourceLoc E; + +public: + FullSourceRange() = default; + FullSourceRange(FullSourceLoc Begin, FullSourceLoc End) : B(Begin), E(End) {} + + FullSourceLoc getBegin() const { return B; } + FullSourceLoc getEnd() const { return E; } + + bool isValid() const { return B.isValid() && E.isValid(); } + bool isInvalid() const { return !isValid(); } + + bool operator==(const FullSourceRange &X) const { + return B == X.B && E == X.E; + } + + bool operator!=(const FullSourceRange &X) const { + return B != X.B || E != X.E; + } + + // Returns true iff other is wholly contained within this range. + bool fullyContains(const FullSourceRange &other) const { + return B <= other.B && E >= other.E; + } + + void print(raw_ostream &OS) const; + std::string printToString() const; + void dump() const; +}; + } // namespace clang namespace llvm { diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -63,6 +63,7 @@ NoSanitizeList.cpp SanitizerSpecialCaseList.cpp Sanitizers.cpp + Sarif.cpp SourceLocation.cpp SourceManager.cpp Stack.cpp diff --git a/clang/lib/Basic/Sarif.cpp b/clang/lib/Basic/Sarif.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Basic/Sarif.cpp @@ -0,0 +1,402 @@ +//===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of the SARIFDocumentWriter class, and +/// associated builders such as: +/// - \ref SarifArtifact +/// - \ref SarifArtifactLocation +/// - \ref SarifRule +/// - \ref SarifResult +//===----------------------------------------------------------------------===// +#include "clang/Basic/Sarif.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/Version.h" +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Path.h" + +#include +#include +#include + +using namespace clang; +using namespace llvm; + +using clang::detail::SarifArtifact; +using clang::detail::SarifArtifactLocation; + +static StringRef getFileName(const FileEntry &FE) { + StringRef Filename = FE.tryGetRealPathName(); + if (Filename.empty()) + Filename = FE.getName(); + return Filename; +} +/// \name URI +/// @{ + +/// \internal +/// \brief +/// Return the RFC3986 encoding of the input character. +/// +/// \param C Character to encode to RFC3986 +/// +/// \return The RFC3986 representation of \c C +static std::string percentEncodeURICharacter(char C) { + // RFC 3986 claims alpha, numeric, and this handful of + // characters are not reserved for the path component and + // should be written out directly. Otherwise, percent + // encode the character and write that out instead of the + // reserved character. + if (llvm::isAlnum(C) || + StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) + return std::string(&C, 1); + return "%" + llvm::toHex(StringRef(&C, 1)); +} + +/// \internal +/// \brief Return a URI representing the given file name. +/// +/// \param Filename The filename to be represented as URI +/// +/// \return RFC3986 URI representing the input file name +static std::string fileNameToURI(StringRef Filename) { + llvm::SmallString<32> Ret = StringRef("file://"); + + // Get the root name to see if it has a URI authority. + StringRef Root = sys::path::root_name(Filename); + if (Root.startswith("//")) { + // There is an authority, so add it to the URI. + Ret += Root.drop_front(2).str(); + } else if (!Root.empty()) { + // There is no authority, so end the component and add the root to the URI. + Ret += Twine("/" + Root).str(); + } + + auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); + assert(Iter != End && "Expected there to be a non-root path component."); + // Add the rest of the path components, encoding any reserved characters; + // we skip past the first path component, as it was handled it above. + std::for_each(++Iter, End, [&Ret](StringRef Component) { + // For reasons unknown to me, we may get a backslash with Windows native + // paths for the initial backslash following the drive component, which + // we need to ignore as a URI path part. + if (Component == "\\") + return; + + // Add the separator between the previous path part and the one being + // currently processed. + Ret += "/"; + + // URI encode the part. + for (char C : Component) { + Ret += percentEncodeURICharacter(C); + } + }); + + return std::string(Ret); +} +/// @} + +/// \brief Calculate the column position expressed in the number of UTF-8 code +/// points from column start to the source location +/// +/// \param Loc The source location whose column needs to be calculated +/// \param TokenLen Optional hint for when the token is multiple bytes long +/// +/// \return The column number as a UTF-8 aware byte offset from column start to +/// the effective source location +static unsigned int adjustColumnPos(FullSourceLoc Loc, + unsigned int TokenLen = 0) { + assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); + + std::pair LocInfo = Loc.getDecomposedLoc(); + assert(LocInfo.second > Loc.getExpansionColumnNumber() && + "position in file is before column number?"); + + Optional Buf = + Loc.getManager().getBufferOrNone(LocInfo.first); + assert(Buf && "got an invalid buffer for the location's file"); + assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && + "token extends past end of buffer?"); + + // Adjust the offset to be the start of the line, since we'll be counting + // Unicode characters from there until our column offset. + unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); + unsigned int Ret = 1; + while (Off < (LocInfo.second + TokenLen)) { + Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); + Ret++; + } + + return Ret; +} + +/// \name SARIF Utilities +/// @{ + +/// \internal +json::Object createMessage(StringRef Text) { + return json::Object{{"text", Text.str()}}; +} + +/// \internal +static json::Object createTextRegion(const LangOptions &LO, + const FullSourceRange &R) { + json::Object Region{{"startLine", R.getBegin().getExpansionLineNumber()}, + {"startColumn", adjustColumnPos(R.getBegin())}}; + if (R.getBegin() == R.getEnd()) { + Region["endColumn"] = adjustColumnPos(R.getBegin()); + } else { + Region["endLine"] = R.getEnd().getExpansionLineNumber(); + Region["endColumn"] = adjustColumnPos( + R.getEnd(), Lexer::MeasureTokenLength(R.getEnd().getLocWithOffset(0), + R.getEnd().getManager(), LO)); + } + return Region; +} + +static json::Object createLocation(json::Object &&PhysicalLocation, + StringRef Message = "") { + json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; + if (!Message.empty()) + Ret.insert({"message", createMessage(Message)}); + return Ret; +} + +static StringRef importanceToStr(ThreadFlowImportance I) { + switch (I) { + case ThreadFlowImportance::Important: + return "important"; + case ThreadFlowImportance::Essential: + return "essential"; + case ThreadFlowImportance::Unimportant: + return "unimportant"; + } + llvm_unreachable("Fully covered switch is not so fully covered"); +} + +static json::Object +createThreadFlowLocation(json::Object &&Location, + const ThreadFlowImportance &Importance) { + return json::Object{{"locations", std::move(Location)}, + {"importance", importanceToStr(Importance)}}; +} +/// @} + +json::Object +SarifDocumentWriter::createPhysicalLocation(const FullSourceRange &R) { + assert(R.isValid() && + "Cannot create a physicalLocation from invalid SourceRange!"); + const FileEntry *FE = R.getBegin().getExpansionLoc().getFileEntry(); + assert(FE != nullptr && "Diagnostic does not exist within a valid file!"); + + const std::string &FileURI = fileNameToURI(getFileName(*FE)); + auto I = CurrentArtifacts.find(FileURI); + + if (I == CurrentArtifacts.end()) { + uint32_t Idx = static_cast(CurrentArtifacts.size()); + const SarifArtifactLocation &Location = + SarifArtifactLocation::create(FileURI).setIndex(Idx); + const SarifArtifact &Artifact = SarifArtifact::create(Location) + .setRoles({"resultFile"}) + .setLength(FE->getSize()) + .setMimeType("text/plain"); + auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact}); + // If inserted, ensure the original iterator points to the newly inserted + // element, so it can be used downstream + if (StatusIter.second) + I = StatusIter.first; + } + assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); + const SarifArtifactLocation &Location = I->second.Location; + uint32_t Idx = Location.Index.getValue(); + return json::Object{ + {{"artifacts", json::Object{{{"uri", FileURI}, {"index", Idx}}}}, + {"region", createTextRegion(LangOpts, R)}}}; +} + +json::Object &SarifDocumentWriter::getCurrentTool() { + assert(!Closed && "SARIF Document is closed. " + "Need to call createRun() before using getcurrentTool!"); + + // Since Closed = false here, expect there to be at least 1 Run, anything + // else is an invalid state + assert(!Runs.empty() && "There are no runs associated with the document!"); + + return *Runs.back().getAsObject()->get("tool")->getAsObject(); +} + +void SarifDocumentWriter::reset() { + CurrentRules.clear(); + CurrentArtifacts.clear(); +} + +void SarifDocumentWriter::endRun() { + // Exit early if trying to close a closed Document + if (Closed) { + reset(); + return; + } + + // Since Closed = false here, expect there to be at least 1 Run, anything + // else is an invalid state + assert(!Runs.empty() && "There are no runs associated with the document!"); + + // Flush all the rules + json::Object &Tool = getCurrentTool(); + json::Array Rules; + for (const SarifRule &R : CurrentRules) { + json::Object Rule{ + {"name", R.Name}, {"ruleId", R.Id}, {"fullDescription", R.Description}}; + if (!R.HelpURI.empty()) + Rule["helpUri"] = R.HelpURI; + Rules.emplace_back(std::move(Rule)); + } + Tool["rules"] = std::move(Rules); + + // Flush all the artifacts + json::Object &Run = getCurrentRun(); + json::Array *Artifacts = Run.getArray("artifacts"); + for (const auto &Pair : CurrentArtifacts) { + const SarifArtifact &A = Pair.getValue(); + json::Object Loc{{"uri", A.Location.URI}}; + if (A.Location.Index.hasValue()) { + Loc["index"] = static_cast(A.Location.Index.getValue()); + } + json::Object Artifact; + Artifact["location"] = std::move(Loc); + if (A.Length.hasValue()) + Artifact["length"] = static_cast(A.Length.getValue()); + if (!A.Roles.empty()) + Artifact["roles"] = json::Array(A.Roles); + if (!A.MimeType.empty()) + Artifact["mimeType"] = A.MimeType; + if (A.Offset.hasValue()) + Artifact["offset"] = A.Offset; + Artifacts->push_back(json::Value(std::move(Artifact))); + } + + // Clear, reset temporaries before next run + reset(); + + // Mark the document as closed + Closed = true; +} + +json::Array +SarifDocumentWriter::createThreadFlows(ArrayRef ThreadFlows) { + json::Object Ret{{"locations", json::Array{}}}; + json::Array Locs; + for (const auto &ThreadFlow : ThreadFlows) { + json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); + json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); + Locs.emplace_back( + createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); + } + Ret["locations"] = std::move(Locs); + return json::Array{std::move(Ret)}; +} + +json::Object +SarifDocumentWriter::createCodeFlow(ArrayRef ThreadFlows) { + return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; +} + +void SarifDocumentWriter::createRun(StringRef ShortToolName, + StringRef LongToolName) { + // Clear resources associated with a previous run + endRun(); + + // Signify a new run has begun + Closed = false; + + json::Object Tool{ + {"driver", + json::Object{{"name", ShortToolName}, + {"fullName", LongToolName}, + {"language", "en-US"}, + {"version", getClangToolFullVersion(ShortToolName)}}}}; + json::Object theRun{{"tool", std::move(Tool)}, + {"results", {}}, + {"artifacts", {}}, + {"columnKind", "unicodeCodePoints"}}; + Runs.emplace_back(std::move(theRun)); +} + +json::Object &SarifDocumentWriter::getCurrentRun() { + assert(!Closed && + "SARIF Document is closed. " + "Can only getCurrentRun() if document is opened via createRun(), " + "create a run first"); + + // Since Closed = false here, expect there to be at least 1 Run, anything + // else is an invalid state + assert(!Runs.empty() && "There are no runs associated with the document!"); + return *Runs.back().getAsObject(); +} + +size_t SarifDocumentWriter::createRule(StringRef Name, StringRef RuleId, + StringRef Description, + StringRef HelpURI) { + size_t Ret = CurrentRules.size(); + SarifRule Rule = SarifRule::create() + .setName(Name) + .setRuleId(RuleId) + .setDescription(Description) + .setHelpURI(HelpURI); + CurrentRules.emplace_back(Rule); + return Ret; +} + +size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { + size_t Ret = CurrentRules.size(); + CurrentRules.emplace_back(Rule); + return Ret; +} + +void SarifDocumentWriter::appendResult(const SarifResult &Result) { + size_t RuleIdx = Result.RuleIdx; + assert(RuleIdx < CurrentRules.size() && + "Trying to reference a rule that doesn't exist"); + json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, + {"ruleIndex", static_cast(RuleIdx)}, + {"ruleId", CurrentRules[RuleIdx].Id}}; + if (!Result.Locations.empty()) { + json::Array Locs; + for (auto &Range : Result.Locations) { + Locs.emplace_back(createLocation(createPhysicalLocation(Range))); + } + Ret["locations"] = std::move(Locs); + } + if (!Result.ThreadFlows.empty()) + Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; + json::Object &Run = getCurrentRun(); + json::Array *Results = Run.getArray("results"); + Results->emplace_back(std::move(Ret)); +} + +json::Object SarifDocumentWriter::createDocument() { + // Flush all temporaries to their destinations if needed + endRun(); + + json::Object doc{ + {"$schema", SchemaURI}, + {"version", SchemaVersion}, + }; + if (!Runs.empty()) + doc["runs"] = json::Array(Runs); + return doc; +} diff --git a/clang/lib/Basic/SourceLocation.cpp b/clang/lib/Basic/SourceLocation.cpp --- a/clang/lib/Basic/SourceLocation.cpp +++ b/clang/lib/Basic/SourceLocation.cpp @@ -270,3 +270,30 @@ std::pair FullSourceLoc::getDecomposedLoc() const { return SrcMgr->getDecomposedLoc(*this); } + +//===----------------------------------------------------------------------===// +// FullSourceRange +//===----------------------------------------------------------------------===// + +void FullSourceRange::print(raw_ostream &OS) const { + + OS << '<'; + PresumedLoc PrintedLoc = PrintDifference(OS, B.getManager(), B, {}); + if (B != E) { + OS << ", "; + PrintDifference(OS, E.getManager(), E, PrintedLoc); + } + OS << '>'; +} + +LLVM_DUMP_METHOD std::string FullSourceRange::printToString() const { + std::string S; + llvm::raw_string_ostream OS(S); + print(OS); + return OS.str(); +} + +LLVM_DUMP_METHOD void FullSourceRange::dump() const { + this->print(llvm::errs()); + llvm::errs() << '\n'; +} diff --git a/clang/unittests/Basic/CMakeLists.txt b/clang/unittests/Basic/CMakeLists.txt --- a/clang/unittests/Basic/CMakeLists.txt +++ b/clang/unittests/Basic/CMakeLists.txt @@ -10,6 +10,7 @@ FileManagerTest.cpp LineOffsetMappingTest.cpp SanitizersTest.cpp + SarifTest.cpp SourceManagerTest.cpp ) diff --git a/clang/unittests/Basic/SarifTest.cpp b/clang/unittests/Basic/SarifTest.cpp new file mode 100644 --- /dev/null +++ b/clang/unittests/Basic/SarifTest.cpp @@ -0,0 +1,138 @@ +//===- unittests/Basic/SarifTest.cpp - Test writing SARIF documents -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Sarif.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/JSON.h" +#include + +#include "gmock/gmock.h" +#include "gtest/gtest-death-test.h" +#include "gtest/gtest.h" + +using namespace clang; +using namespace llvm; + +namespace { + +TEST(SarifDocumentWriterTest, createEmptyDocument) { + // GIVEN: + SarifDocumentWriter writer; + + // WHEN: + const json::Object &emptyDocument = writer.createDocument(); + std::vector keys(emptyDocument.size()); + std::transform(emptyDocument.begin(), emptyDocument.end(), keys.begin(), + [](auto item) { return item.getFirst(); }); + + // THEN: + ASSERT_THAT(keys, testing::UnorderedElementsAre("$schema", "version")); +} + +// Test that a newly inserted run will associate correct tool names +TEST(SarifDocumentWriterTest, documentWithARun) { + // GIVEN: + SarifDocumentWriter writer; + const char *shortName = "sariftest"; + const char *longName = "sarif writer test"; + + // WHEN: + writer.createRun(shortName, longName); + writer.endRun(); + const json::Object &document = writer.createDocument(); + const json::Array *runs = document.getArray("runs"); + + // THEN: + // A run was created + ASSERT_THAT(runs, testing::NotNull()); + + // It is the only run + ASSERT_EQ(runs->size(), 1UL); + + // The tool associated with the run was the tool + const json::Object *driver = + runs->begin()->getAsObject()->getObject("tool")->getObject("driver"); + ASSERT_THAT(driver, testing::NotNull()); + + ASSERT_TRUE(driver->getString("name").hasValue()); + ASSERT_TRUE(driver->getString("fullName").hasValue()); + ASSERT_TRUE(driver->getString("language").hasValue()); + + EXPECT_EQ(driver->getString("name").getValue(), shortName); + EXPECT_EQ(driver->getString("fullName").getValue(), longName); + EXPECT_EQ(driver->getString("language").getValue(), "en-US"); +} + +// Test adding result without a run causes a crash +TEST(SarifDocumentWriterTest, addingResultsWillCrashIfThereIsNoRun) { + // GIVEN: + SarifDocumentWriter writer; + + // WHEN: + // A SarifDocumentWriter::createRun(...) was not called prior to + // SarifDocumentWriter::appendResult(...) + // But a rule exists + auto ruleIdx = writer.createRule(SarifRule::create()); + SarifResult &&emptyResult = SarifResult::create(ruleIdx); + + // THEN: + ASSERT_DEATH({ writer.appendResult(emptyResult); }, ".*create a run first.*"); +} + +// Test adding rule and result shows up in the final document +TEST(SarifDocumentWriterTest, addResultWithValidRuleIsOk) { + // GIVEN: + SarifDocumentWriter writer; + const SarifRule &rule = + SarifRule::create() + .setRuleId("clang.unittest") + .setDescription("Example rule created during unit tests") + .setName("clang unit test"); + + // WHEN: + writer.createRun("sarif test", "sarif test runner"); + unsigned ruleIdx = writer.createRule(rule); + const SarifResult &result = SarifResult::create(ruleIdx); + + writer.appendResult(result); + const json::Object &document = writer.createDocument(); + + // THEN: + // A document with a valid schema and version exists + ASSERT_THAT(document.get("$schema"), ::testing::NotNull()); + ASSERT_THAT(document.get("version"), ::testing::NotNull()); + const json::Array *runs = document.getArray("runs"); + + // A run exists on this document + ASSERT_THAT(runs, ::testing::NotNull()); + ASSERT_EQ(runs->size(), 1UL); + const json::Object *theRun = runs->back().getAsObject(); + + // The run has slots for tools, results, rules and artifacts + ASSERT_THAT(theRun->get("tool"), ::testing::NotNull()); + ASSERT_THAT(theRun->get("results"), ::testing::NotNull()); + ASSERT_THAT(theRun->get("artifacts"), ::testing::NotNull()); + const json::Object *driver = theRun->getObject("tool")->getObject("driver"); + const json::Array *results = theRun->getArray("results"); + const json::Array *artifacts = theRun->getArray("artifacts"); + + // The tool is as expected + ASSERT_TRUE(driver->getString("name").hasValue()); + ASSERT_TRUE(driver->getString("fullName").hasValue()); + + EXPECT_EQ(driver->getString("name").getValue(), "sarif test"); + EXPECT_EQ(driver->getString("fullName").getValue(), "sarif test runner"); + + // The results are as expected + EXPECT_EQ(results->size(), 1UL); + + // The artifacts are as expected + EXPECT_TRUE(artifacts->empty()); +} + +} // namespace