| //===-- StdLib.cpp ----------------------------------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| #include "StdLib.h" |
| #include <fstream> |
| #include <memory> |
| #include <optional> |
| #include <string> |
| #include <vector> |
| |
| #include "Compiler.h" |
| #include "Config.h" |
| #include "SymbolCollector.h" |
| #include "index/IndexAction.h" |
| #include "support/Logger.h" |
| #include "support/ThreadsafeFS.h" |
| #include "support/Trace.h" |
| #include "clang/Basic/LangOptions.h" |
| #include "clang/Frontend/CompilerInvocation.h" |
| #include "clang/Lex/PreprocessorOptions.h" |
| #include "clang/Tooling/Inclusions/StandardLibrary.h" |
| #include "llvm/ADT/IntrusiveRefCntPtr.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/Support/MemoryBuffer.h" |
| #include "llvm/Support/Path.h" |
| |
| namespace clang { |
| namespace clangd { |
| namespace { |
| |
| enum Lang { C, CXX }; |
| |
| Lang langFromOpts(const LangOptions &LO) { return LO.CPlusPlus ? CXX : C; } |
| llvm::StringLiteral mandatoryHeader(Lang L) { |
| switch (L) { |
| case C: |
| return "stdio.h"; |
| case CXX: |
| return "vector"; |
| } |
| llvm_unreachable("unhandled Lang"); |
| } |
| |
| LangStandard::Kind standardFromOpts(const LangOptions &LO) { |
| if (LO.CPlusPlus) { |
| if (LO.CPlusPlus23) |
| return LangStandard::lang_cxx23; |
| if (LO.CPlusPlus20) |
| return LangStandard::lang_cxx20; |
| if (LO.CPlusPlus17) |
| return LangStandard::lang_cxx17; |
| if (LO.CPlusPlus14) |
| return LangStandard::lang_cxx14; |
| if (LO.CPlusPlus11) |
| return LangStandard::lang_cxx11; |
| return LangStandard::lang_cxx98; |
| } |
| if (LO.C23) |
| return LangStandard::lang_c23; |
| // C17 has no new features, so treat {C11,C17} as C17. |
| if (LO.C11) |
| return LangStandard::lang_c17; |
| return LangStandard::lang_c99; |
| } |
| |
| std::string buildUmbrella(llvm::StringLiteral Mandatory, |
| llvm::ArrayRef<tooling::stdlib::Header> Headers) { |
| std::string Result; |
| llvm::raw_string_ostream OS(Result); |
| |
| // We __has_include guard all our #includes to avoid errors when using older |
| // stdlib version that don't have headers for the newest language standards. |
| // But make sure we get *some* error if things are totally broken. |
| OS << llvm::formatv( |
| "#if !__has_include(<{0}>)\n" |
| "#error Mandatory header <{0}> not found in standard library!\n" |
| "#endif\n", |
| Mandatory); |
| |
| for (auto Header : Headers) { |
| OS << llvm::formatv("#if __has_include({0})\n" |
| "#include {0}\n" |
| "#endif\n", |
| Header); |
| } |
| OS.flush(); |
| return Result; |
| } |
| |
| } // namespace |
| |
| llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &LO) { |
| // The umbrella header is the same for all versions of each language. |
| // Headers that are unsupported in old lang versions are usually guarded by |
| // #if. Some headers may be not present in old stdlib versions, the umbrella |
| // header guards with __has_include for this purpose. |
| Lang L = langFromOpts(LO); |
| switch (L) { |
| case CXX: |
| static std::string *UmbrellaCXX = new std::string(buildUmbrella( |
| mandatoryHeader(L), |
| tooling::stdlib::Header::all(tooling::stdlib::Lang::CXX))); |
| return *UmbrellaCXX; |
| case C: |
| static std::string *UmbrellaC = new std::string( |
| buildUmbrella(mandatoryHeader(L), |
| tooling::stdlib::Header::all(tooling::stdlib::Lang::C))); |
| return *UmbrellaC; |
| } |
| llvm_unreachable("invalid Lang in langFromOpts"); |
| } |
| |
| namespace { |
| |
| // Including the standard library leaks unwanted transitively included symbols. |
| // |
| // We want to drop these, they're a bit tricky to identify: |
| // - we don't want to limit to symbols on our list, as our list has only |
| // top-level symbols (and there may be legitimate stdlib extensions). |
| // - we can't limit to only symbols defined in known stdlib headers, as stdlib |
| // internal structure is murky |
| // - we can't strictly require symbols to come from a particular path, e.g. |
| // libstdc++ is mostly under /usr/include/c++/10/... |
| // but std::ctype_base is under /usr/include/<platform>/c++/10/... |
| // We require the symbol to come from a header that is *either* from |
| // the standard library path (as identified by the location of <vector>), or |
| // another header that defines a symbol from our stdlib list. |
| SymbolSlab filter(SymbolSlab Slab, const StdLibLocation &Loc) { |
| SymbolSlab::Builder Result; |
| |
| static auto &StandardHeaders = *[] { |
| auto *Set = new llvm::DenseSet<llvm::StringRef>(); |
| for (auto Header : tooling::stdlib::Header::all(tooling::stdlib::Lang::CXX)) |
| Set->insert(Header.name()); |
| for (auto Header : tooling::stdlib::Header::all(tooling::stdlib::Lang::C)) |
| Set->insert(Header.name()); |
| return Set; |
| }(); |
| |
| // Form prefixes like file:///usr/include/c++/10/ |
| // These can be trivially prefix-compared with URIs in the indexed symbols. |
| llvm::SmallVector<std::string> StdLibURIPrefixes; |
| for (const auto &Path : Loc.Paths) { |
| StdLibURIPrefixes.push_back(URI::create(Path).toString()); |
| if (StdLibURIPrefixes.back().back() != '/') |
| StdLibURIPrefixes.back().push_back('/'); |
| } |
| // For each header URI, is it *either* prefixed by StdLibURIPrefixes *or* |
| // owner of a symbol whose insertable header is in StandardHeaders? |
| // Pointer key because strings in a SymbolSlab are interned. |
| llvm::DenseMap<const char *, bool> GoodHeader; |
| for (const Symbol &S : Slab) { |
| if (!S.IncludeHeaders.empty() && |
| StandardHeaders.contains(S.IncludeHeaders.front().IncludeHeader)) { |
| GoodHeader[S.CanonicalDeclaration.FileURI] = true; |
| GoodHeader[S.Definition.FileURI] = true; |
| continue; |
| } |
| for (const char *URI : |
| {S.CanonicalDeclaration.FileURI, S.Definition.FileURI}) { |
| auto R = GoodHeader.try_emplace(URI, false); |
| if (R.second) { |
| R.first->second = llvm::any_of( |
| StdLibURIPrefixes, |
| [&, URIStr(llvm::StringRef(URI))](const std::string &Prefix) { |
| return URIStr.starts_with(Prefix); |
| }); |
| } |
| } |
| } |
| #ifndef NDEBUG |
| for (const auto &Good : GoodHeader) |
| if (Good.second && *Good.first) |
| dlog("Stdlib header: {0}", Good.first); |
| #endif |
| // Empty URIs aren't considered good. (Definition can be blank). |
| auto IsGoodHeader = [&](const char *C) { return *C && GoodHeader.lookup(C); }; |
| |
| for (const Symbol &S : Slab) { |
| if (!(IsGoodHeader(S.CanonicalDeclaration.FileURI) || |
| IsGoodHeader(S.Definition.FileURI))) { |
| dlog("Ignoring wrong-header symbol {0}{1} in {2}", S.Scope, S.Name, |
| S.CanonicalDeclaration.FileURI); |
| continue; |
| } |
| Result.insert(S); |
| } |
| |
| return std::move(Result).build(); |
| } |
| |
| } // namespace |
| |
| SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources, |
| std::unique_ptr<CompilerInvocation> CI, |
| const StdLibLocation &Loc, |
| const ThreadsafeFS &TFS) { |
| if (CI->getFrontendOpts().Inputs.size() != 1 || |
| !CI->getPreprocessorOpts().ImplicitPCHInclude.empty()) { |
| elog("Indexing standard library failed: bad CompilerInvocation"); |
| assert(false && "indexing stdlib with a dubious CompilerInvocation!"); |
| return SymbolSlab(); |
| } |
| const FrontendInputFile &Input = CI->getFrontendOpts().Inputs.front(); |
| trace::Span Tracer("StandardLibraryIndex"); |
| LangStandard::Kind LangStd = standardFromOpts(CI->getLangOpts()); |
| log("Indexing {0} standard library in the context of {1}", |
| LangStandard::getLangStandardForKind(LangStd).getName(), Input.getFile()); |
| |
| SymbolSlab Symbols; |
| IgnoreDiagnostics IgnoreDiags; |
| // CompilerInvocation is taken from elsewhere, and may map a dirty buffer. |
| CI->getPreprocessorOpts().clearRemappedFiles(); |
| auto Clang = prepareCompilerInstance( |
| std::move(CI), /*Preamble=*/nullptr, |
| llvm::MemoryBuffer::getMemBuffer(HeaderSources, Input.getFile()), |
| TFS.view(/*CWD=*/std::nullopt), IgnoreDiags); |
| if (!Clang) { |
| elog("Standard Library Index: Couldn't build compiler instance"); |
| return Symbols; |
| } |
| |
| SymbolCollector::Options IndexOpts; |
| IndexOpts.Origin = SymbolOrigin::StdLib; |
| IndexOpts.CollectMainFileSymbols = false; |
| IndexOpts.CollectMainFileRefs = false; |
| IndexOpts.CollectMacro = true; |
| IndexOpts.StoreAllDocumentation = true; |
| // Sadly we can't use IndexOpts.FileFilter to restrict indexing scope. |
| // Files from outside the StdLibLocation may define true std symbols anyway. |
| // We end up "blessing" such headers, and can only do that by indexing |
| // everything first. |
| |
| // Refs, relations, include graph in the stdlib mostly aren't useful. |
| auto Action = createStaticIndexingAction( |
| IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); }, nullptr, |
| nullptr, nullptr); |
| |
| if (!Action->BeginSourceFile(*Clang, Input)) { |
| elog("Standard Library Index: BeginSourceFile() failed"); |
| return Symbols; |
| } |
| |
| if (llvm::Error Err = Action->Execute()) { |
| elog("Standard Library Index: Execute failed: {0}", std::move(Err)); |
| return Symbols; |
| } |
| |
| Action->EndSourceFile(); |
| |
| unsigned SymbolsBeforeFilter = Symbols.size(); |
| Symbols = filter(std::move(Symbols), Loc); |
| bool Errors = Clang->hasDiagnostics() && |
| Clang->getDiagnostics().hasUncompilableErrorOccurred(); |
| log("Indexed {0} standard library{3}: {1} symbols, {2} filtered", |
| LangStandard::getLangStandardForKind(LangStd).getName(), Symbols.size(), |
| SymbolsBeforeFilter - Symbols.size(), |
| Errors ? " (incomplete due to errors)" : ""); |
| SPAN_ATTACH(Tracer, "symbols", int(Symbols.size())); |
| return Symbols; |
| } |
| |
| SymbolSlab indexStandardLibrary(std::unique_ptr<CompilerInvocation> Invocation, |
| const StdLibLocation &Loc, |
| const ThreadsafeFS &TFS) { |
| llvm::StringRef Header = getStdlibUmbrellaHeader(Invocation->getLangOpts()); |
| return indexStandardLibrary(Header, std::move(Invocation), Loc, TFS); |
| } |
| |
| bool StdLibSet::isBest(const LangOptions &LO) const { |
| return standardFromOpts(LO) >= |
| Best[langFromOpts(LO)].load(std::memory_order_acquire); |
| } |
| |
| std::optional<StdLibLocation> StdLibSet::add(const LangOptions &LO, |
| const HeaderSearch &HS) { |
| Lang L = langFromOpts(LO); |
| int OldVersion = Best[L].load(std::memory_order_acquire); |
| int NewVersion = standardFromOpts(LO); |
| dlog("Index stdlib? {0}", |
| LangStandard::getLangStandardForKind(standardFromOpts(LO)).getName()); |
| |
| if (!Config::current().Index.StandardLibrary) { |
| dlog("No: disabled in config"); |
| return std::nullopt; |
| } |
| |
| if (NewVersion <= OldVersion) { |
| dlog("No: have {0}, {1}>={2}", |
| LangStandard::getLangStandardForKind( |
| static_cast<LangStandard::Kind>(NewVersion)) |
| .getName(), |
| OldVersion, NewVersion); |
| return std::nullopt; |
| } |
| |
| // We'd like to index a standard library here if there is one. |
| // Check for the existence of <vector> on the search path. |
| // We could cache this, but we only get here repeatedly when there's no |
| // stdlib, and even then only once per preamble build. |
| llvm::StringLiteral ProbeHeader = mandatoryHeader(L); |
| llvm::SmallString<256> Path; // Scratch space. |
| llvm::SmallVector<std::string> SearchPaths; |
| auto RecordHeaderPath = [&](llvm::StringRef HeaderPath) { |
| llvm::StringRef DirPath = llvm::sys::path::parent_path(HeaderPath); |
| if (!HS.getFileMgr().getVirtualFileSystem().getRealPath(DirPath, Path)) |
| SearchPaths.emplace_back(Path); |
| }; |
| for (const auto &DL : |
| llvm::make_range(HS.search_dir_begin(), HS.search_dir_end())) { |
| switch (DL.getLookupType()) { |
| case DirectoryLookup::LT_NormalDir: { |
| Path = DL.getDirRef()->getName(); |
| llvm::sys::path::append(Path, ProbeHeader); |
| llvm::vfs::Status Stat; |
| if (!HS.getFileMgr().getNoncachedStatValue(Path, Stat) && |
| Stat.isRegularFile()) |
| RecordHeaderPath(Path); |
| break; |
| } |
| case DirectoryLookup::LT_Framework: |
| // stdlib can't be a framework (framework includes must have a slash) |
| continue; |
| case DirectoryLookup::LT_HeaderMap: |
| llvm::StringRef Target = |
| DL.getHeaderMap()->lookupFilename(ProbeHeader, Path); |
| if (!Target.empty()) |
| RecordHeaderPath(Target); |
| break; |
| } |
| } |
| if (SearchPaths.empty()) |
| return std::nullopt; |
| |
| dlog("Found standard library in {0}", llvm::join(SearchPaths, ", ")); |
| |
| while (!Best[L].compare_exchange_weak(OldVersion, NewVersion, |
| std::memory_order_acq_rel)) |
| if (OldVersion >= NewVersion) { |
| dlog("No: lost the race"); |
| return std::nullopt; // Another thread won the race while we were |
| // checking. |
| } |
| |
| dlog("Yes, index stdlib!"); |
| return StdLibLocation{std::move(SearchPaths)}; |
| } |
| |
| } // namespace clangd |
| } // namespace clang |