| //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// This file contains the declaration of the SARIFDocumentWriter class, and |
| /// associated builders such as: |
| /// - \ref SarifArtifact |
| /// - \ref SarifArtifactLocation |
| /// - \ref SarifRule |
| /// - \ref SarifResult |
| //===----------------------------------------------------------------------===// |
| #include "clang/Basic/Sarif.h" |
| #include "clang/Basic/SourceLocation.h" |
| #include "clang/Basic/SourceManager.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/StringExtras.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/Support/ConvertUTF.h" |
| #include "llvm/Support/JSON.h" |
| #include "llvm/Support/Path.h" |
| |
| #include <optional> |
| #include <string> |
| #include <utility> |
| |
| using namespace clang; |
| using namespace llvm; |
| |
| using clang::detail::SarifArtifact; |
| using clang::detail::SarifArtifactLocation; |
| |
| static StringRef getFileName(FileEntryRef FE) { |
| StringRef Filename = FE.getFileEntry().tryGetRealPathName(); |
| if (Filename.empty()) |
| Filename = FE.getName(); |
| return Filename; |
| } |
| /// \name URI |
| /// @{ |
| |
| /// \internal |
| /// \brief |
| /// Return the RFC3986 encoding of the input character. |
| /// |
| /// \param C Character to encode to RFC3986. |
| /// |
| /// \return The RFC3986 representation of \c C. |
| static std::string percentEncodeURICharacter(char C) { |
| // RFC 3986 claims alpha, numeric, and this handful of |
| // characters are not reserved for the path component and |
| // should be written out directly. Otherwise, percent |
| // encode the character and write that out instead of the |
| // reserved character. |
| if (llvm::isAlnum(C) || |
| StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) |
| return std::string(&C, 1); |
| return "%" + llvm::toHex(StringRef(&C, 1)); |
| } |
| |
| /// \internal |
| /// \brief Return a URI representing the given file name. |
| /// |
| /// \param Filename The filename to be represented as URI. |
| /// |
| /// \return RFC3986 URI representing the input file name. |
| static std::string fileNameToURI(StringRef Filename) { |
| SmallString<32> Ret = StringRef("file://"); |
| |
| // Get the root name to see if it has a URI authority. |
| StringRef Root = sys::path::root_name(Filename); |
| if (Root.starts_with("//")) { |
| // There is an authority, so add it to the URI. |
| Ret += Root.drop_front(2).str(); |
| } else if (!Root.empty()) { |
| // There is no authority, so end the component and add the root to the URI. |
| Ret += Twine("/" + Root).str(); |
| } |
| |
| auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); |
| assert(Iter != End && "Expected there to be a non-root path component."); |
| // Add the rest of the path components, encoding any reserved characters; |
| // we skip past the first path component, as it was handled it above. |
| for (StringRef Component : llvm::make_range(++Iter, End)) { |
| // For reasons unknown to me, we may get a backslash with Windows native |
| // paths for the initial backslash following the drive component, which |
| // we need to ignore as a URI path part. |
| if (Component == "\\") |
| continue; |
| |
| // Add the separator between the previous path part and the one being |
| // currently processed. |
| Ret += "/"; |
| |
| // URI encode the part. |
| for (char C : Component) { |
| Ret += percentEncodeURICharacter(C); |
| } |
| } |
| |
| return std::string(Ret); |
| } |
| /// @} |
| |
| /// \brief Calculate the column position expressed in the number of UTF-8 code |
| /// points from column start to the source location |
| /// |
| /// \param Loc The source location whose column needs to be calculated. |
| /// \param TokenLen Optional hint for when the token is multiple bytes long. |
| /// |
| /// \return The column number as a UTF-8 aware byte offset from column start to |
| /// the effective source location. |
| static unsigned int adjustColumnPos(FullSourceLoc Loc, |
| unsigned int TokenLen = 0) { |
| assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); |
| |
| std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc(); |
| std::optional<MemoryBufferRef> Buf = |
| Loc.getManager().getBufferOrNone(LocInfo.first); |
| assert(Buf && "got an invalid buffer for the location's file"); |
| assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && |
| "token extends past end of buffer?"); |
| |
| // Adjust the offset to be the start of the line, since we'll be counting |
| // Unicode characters from there until our column offset. |
| unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); |
| unsigned int Ret = 1; |
| while (Off < (LocInfo.second + TokenLen)) { |
| Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); |
| Ret++; |
| } |
| |
| return Ret; |
| } |
| |
| /// \name SARIF Utilities |
| /// @{ |
| |
| /// \internal |
| json::Object createMessage(StringRef Text) { |
| return json::Object{{"text", Text.str()}}; |
| } |
| |
| /// \internal |
| /// \pre CharSourceRange must be a token range |
| static json::Object createTextRegion(const SourceManager &SM, |
| const CharSourceRange &R) { |
| FullSourceLoc BeginCharLoc{R.getBegin(), SM}; |
| FullSourceLoc EndCharLoc{R.getEnd(), SM}; |
| json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()}, |
| {"startColumn", adjustColumnPos(BeginCharLoc)}}; |
| |
| if (BeginCharLoc == EndCharLoc) { |
| Region["endColumn"] = adjustColumnPos(BeginCharLoc); |
| } else { |
| Region["endLine"] = EndCharLoc.getExpansionLineNumber(); |
| Region["endColumn"] = adjustColumnPos(EndCharLoc); |
| } |
| return Region; |
| } |
| |
| static json::Object createLocation(json::Object &&PhysicalLocation, |
| StringRef Message = "") { |
| json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; |
| if (!Message.empty()) |
| Ret.insert({"message", createMessage(Message)}); |
| return Ret; |
| } |
| |
| static StringRef importanceToStr(ThreadFlowImportance I) { |
| switch (I) { |
| case ThreadFlowImportance::Important: |
| return "important"; |
| case ThreadFlowImportance::Essential: |
| return "essential"; |
| case ThreadFlowImportance::Unimportant: |
| return "unimportant"; |
| } |
| llvm_unreachable("Fully covered switch is not so fully covered"); |
| } |
| |
| static StringRef resultLevelToStr(SarifResultLevel R) { |
| switch (R) { |
| case SarifResultLevel::None: |
| return "none"; |
| case SarifResultLevel::Note: |
| return "note"; |
| case SarifResultLevel::Warning: |
| return "warning"; |
| case SarifResultLevel::Error: |
| return "error"; |
| } |
| llvm_unreachable("Potentially un-handled SarifResultLevel. " |
| "Is the switch not fully covered?"); |
| } |
| |
| static json::Object |
| createThreadFlowLocation(json::Object &&Location, |
| const ThreadFlowImportance &Importance) { |
| return json::Object{{"location", std::move(Location)}, |
| {"importance", importanceToStr(Importance)}}; |
| } |
| /// @} |
| |
| json::Object |
| SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { |
| assert(R.isValid() && |
| "Cannot create a physicalLocation from invalid SourceRange!"); |
| assert(R.isCharRange() && |
| "Cannot create a physicalLocation from a token range!"); |
| FullSourceLoc Start{R.getBegin(), SourceMgr}; |
| OptionalFileEntryRef FE = Start.getExpansionLoc().getFileEntryRef(); |
| assert(FE && "Diagnostic does not exist within a valid file!"); |
| |
| const std::string &FileURI = fileNameToURI(getFileName(*FE)); |
| auto I = CurrentArtifacts.find(FileURI); |
| |
| if (I == CurrentArtifacts.end()) { |
| uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size()); |
| const SarifArtifactLocation &Location = |
| SarifArtifactLocation::create(FileURI).setIndex(Idx); |
| const SarifArtifact &Artifact = SarifArtifact::create(Location) |
| .setRoles({"resultFile"}) |
| .setLength(FE->getSize()) |
| .setMimeType("text/plain"); |
| auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact}); |
| // If inserted, ensure the original iterator points to the newly inserted |
| // element, so it can be used downstream. |
| if (StatusIter.second) |
| I = StatusIter.first; |
| } |
| assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); |
| const SarifArtifactLocation &Location = I->second.Location; |
| json::Object ArtifactLocationObject{{"uri", Location.URI}}; |
| if (Location.Index.has_value()) |
| ArtifactLocationObject["index"] = *Location.Index; |
| return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)}, |
| {"region", createTextRegion(SourceMgr, R)}}}; |
| } |
| |
| json::Object &SarifDocumentWriter::getCurrentTool() { |
| assert(!Closed && "SARIF Document is closed. " |
| "Need to call createRun() before using getcurrentTool!"); |
| |
| // Since Closed = false here, expect there to be at least 1 Run, anything |
| // else is an invalid state. |
| assert(!Runs.empty() && "There are no runs associated with the document!"); |
| |
| return *Runs.back().getAsObject()->get("tool")->getAsObject(); |
| } |
| |
| void SarifDocumentWriter::reset() { |
| CurrentRules.clear(); |
| CurrentArtifacts.clear(); |
| } |
| |
| void SarifDocumentWriter::endRun() { |
| // Exit early if trying to close a closed Document. |
| if (Closed) { |
| reset(); |
| return; |
| } |
| |
| // Since Closed = false here, expect there to be at least 1 Run, anything |
| // else is an invalid state. |
| assert(!Runs.empty() && "There are no runs associated with the document!"); |
| |
| // Flush all the rules. |
| json::Object &Tool = getCurrentTool(); |
| json::Array Rules; |
| for (const SarifRule &R : CurrentRules) { |
| json::Object Config{ |
| {"enabled", R.DefaultConfiguration.Enabled}, |
| {"level", resultLevelToStr(R.DefaultConfiguration.Level)}, |
| {"rank", R.DefaultConfiguration.Rank}}; |
| json::Object Rule{ |
| {"name", R.Name}, |
| {"id", R.Id}, |
| {"fullDescription", json::Object{{"text", R.Description}}}, |
| {"defaultConfiguration", std::move(Config)}}; |
| if (!R.HelpURI.empty()) |
| Rule["helpUri"] = R.HelpURI; |
| Rules.emplace_back(std::move(Rule)); |
| } |
| json::Object &Driver = *Tool.getObject("driver"); |
| Driver["rules"] = std::move(Rules); |
| |
| // Flush all the artifacts. |
| json::Object &Run = getCurrentRun(); |
| json::Array *Artifacts = Run.getArray("artifacts"); |
| SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec; |
| for (const auto &[K, V] : CurrentArtifacts) |
| Vec.emplace_back(K, V); |
| llvm::sort(Vec, llvm::less_first()); |
| for (const auto &[_, A] : Vec) { |
| json::Object Loc{{"uri", A.Location.URI}}; |
| if (A.Location.Index.has_value()) { |
| Loc["index"] = static_cast<int64_t>(*A.Location.Index); |
| } |
| json::Object Artifact; |
| Artifact["location"] = std::move(Loc); |
| if (A.Length.has_value()) |
| Artifact["length"] = static_cast<int64_t>(*A.Length); |
| if (!A.Roles.empty()) |
| Artifact["roles"] = json::Array(A.Roles); |
| if (!A.MimeType.empty()) |
| Artifact["mimeType"] = A.MimeType; |
| if (A.Offset.has_value()) |
| Artifact["offset"] = *A.Offset; |
| Artifacts->push_back(json::Value(std::move(Artifact))); |
| } |
| |
| // Clear, reset temporaries before next run. |
| reset(); |
| |
| // Mark the document as closed. |
| Closed = true; |
| } |
| |
| json::Array |
| SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) { |
| json::Object Ret{{"locations", json::Array{}}}; |
| json::Array Locs; |
| for (const auto &ThreadFlow : ThreadFlows) { |
| json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); |
| json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); |
| Locs.emplace_back( |
| createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); |
| } |
| Ret["locations"] = std::move(Locs); |
| return json::Array{std::move(Ret)}; |
| } |
| |
| json::Object |
| SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) { |
| return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; |
| } |
| |
| void SarifDocumentWriter::createRun(StringRef ShortToolName, |
| StringRef LongToolName, |
| StringRef ToolVersion) { |
| // Clear resources associated with a previous run. |
| endRun(); |
| |
| // Signify a new run has begun. |
| Closed = false; |
| |
| json::Object Tool{ |
| {"driver", |
| json::Object{{"name", ShortToolName}, |
| {"fullName", LongToolName}, |
| {"language", "en-US"}, |
| {"version", ToolVersion}, |
| {"informationUri", |
| "https://clang.llvm.org/docs/UsersManual.html"}}}}; |
| json::Object TheRun{{"tool", std::move(Tool)}, |
| {"results", {}}, |
| {"artifacts", {}}, |
| {"columnKind", "unicodeCodePoints"}}; |
| Runs.emplace_back(std::move(TheRun)); |
| } |
| |
| json::Object &SarifDocumentWriter::getCurrentRun() { |
| assert(!Closed && |
| "SARIF Document is closed. " |
| "Can only getCurrentRun() if document is opened via createRun(), " |
| "create a run first"); |
| |
| // Since Closed = false here, expect there to be at least 1 Run, anything |
| // else is an invalid state. |
| assert(!Runs.empty() && "There are no runs associated with the document!"); |
| return *Runs.back().getAsObject(); |
| } |
| |
| size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { |
| size_t Ret = CurrentRules.size(); |
| CurrentRules.emplace_back(Rule); |
| return Ret; |
| } |
| |
| void SarifDocumentWriter::appendResult(const SarifResult &Result) { |
| size_t RuleIdx = Result.RuleIdx; |
| assert(RuleIdx < CurrentRules.size() && |
| "Trying to reference a rule that doesn't exist"); |
| const SarifRule &Rule = CurrentRules[RuleIdx]; |
| assert(Rule.DefaultConfiguration.Enabled && |
| "Cannot add a result referencing a disabled Rule"); |
| json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, |
| {"ruleIndex", static_cast<int64_t>(RuleIdx)}, |
| {"ruleId", Rule.Id}}; |
| if (!Result.Locations.empty()) { |
| json::Array Locs; |
| for (auto &Range : Result.Locations) { |
| Locs.emplace_back(createLocation(createPhysicalLocation(Range))); |
| } |
| Ret["locations"] = std::move(Locs); |
| } |
| if (!Result.ThreadFlows.empty()) |
| Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; |
| |
| Ret["level"] = resultLevelToStr( |
| Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level)); |
| |
| json::Object &Run = getCurrentRun(); |
| json::Array *Results = Run.getArray("results"); |
| Results->emplace_back(std::move(Ret)); |
| } |
| |
| json::Object SarifDocumentWriter::createDocument() { |
| // Flush all temporaries to their destinations if needed. |
| endRun(); |
| |
| json::Object Doc{ |
| {"$schema", SchemaURI}, |
| {"version", SchemaVersion}, |
| }; |
| if (!Runs.empty()) |
| Doc["runs"] = json::Array(Runs); |
| return Doc; |
| } |