Diff 259335

clang-tools-extra/clangd/SourceCode.h

	Show First 20 Lines • Show All 210 Lines • ▼ Show 20 Lines

	/// Collects words from the source code.			/// Collects words from the source code.
	/// Unlike collectIdentifiers:			/// Unlike collectIdentifiers:
	/// - also finds text in comments:			/// - also finds text in comments:
	/// - splits text into words			/// - splits text into words
	/// - drops stopwords like "get" and "for"			/// - drops stopwords like "get" and "for"
	llvm::StringSet<> collectWords(llvm::StringRef Content);			llvm::StringSet<> collectWords(llvm::StringRef Content);

				// Something that looks like a word in the source code.
				// Could be a "real" token that's "live" in the AST, a spelled token consumed by
				// the preprocessor, or part of a spelled token (e.g. word in a comment).
				struct SpelledWord {
				// (Spelling) location of the start of the word.
				SourceLocation Location;
				// The range of the word itself, excluding any quotes.
				// This is a subrange of the file buffer.
				llvm::StringRef Text;
				// Whether this word is likely to refer to an identifier. True if:
				// - the word is a spelled identifier token
				// - Text is identifier-like (e.g. "foo_bar")
				// - Text is surrounded by backticks (e.g. Foo in "// returns `Foo`")
				nridgeUnsubmitted Done Reply Inline Actions quotes --> backticks? nridge: quotes --> backticks?
				bool LikelyIdentifier = false;
				// Set if the word is contained in a token spelled in the file.
				// (This should always be true, but comments aren't retained by TokenBuffer).
				const syntax::Token *PartOfSpelledToken = nullptr;
				// Set if the word is exactly a token spelled in the file.
				const syntax::Token *SpelledToken = nullptr;
				// Set if the word is a token spelled in the file, and that token survives
				// preprocessing to emit an expanded token spelled the same way.
				const syntax::Token *ExpandedToken = nullptr;

				// Find the unique word that contains SpelledLoc or starts/ends there.
				static llvm::Optional<SpelledWord> touching(SourceLocation SpelledLoc,
				const syntax::TokenBuffer &TB,
				const LangOptions &LangOpts);
				};

	/// Heuristically determine namespaces visible at a point, without parsing Code.			/// Heuristically determine namespaces visible at a point, without parsing Code.
	/// This considers using-directives and enclosing namespace-declarations that			/// This considers using-directives and enclosing namespace-declarations that
	/// are visible (and not obfuscated) in the file itself (not headers).			/// are visible (and not obfuscated) in the file itself (not headers).
	/// Code should be truncated at the point of interest.			/// Code should be truncated at the point of interest.
	///			///
	/// The returned vector is always non-empty.			/// The returned vector is always non-empty.
	/// - The first element is the namespace that encloses the point: a declaration			/// - The first element is the namespace that encloses the point: a declaration
	/// near the point would be within this namespace.			/// near the point would be within this namespace.
	▲ Show 20 Lines • Show All 56 Lines • Show Last 20 Lines

clang-tools-extra/clangd/SourceCode.cpp

Show First 20 Lines • Show All 849 Lines • ▼ Show 20 Lines	case Separator:
break;		break;
}		}
}		}
Flush();		Flush();

return Result;		return Result;
}		}

		static bool isLikelyIdentifier(llvm::StringRef Word, llvm::StringRef Before,
		nridgeUnsubmitted Done Reply Inline Actions nit: qualify StringRef or not consistently nridge: nit: qualify StringRef or not consistently
		llvm::StringRef After) {
		// `foo` is an identifier.
		if (Before.endswith("`") && After.startswith("`"))
		return true;
		// In foo::bar, both foo and bar are identifiers.
		if (Before.endswith("::") \|\| After.startswith("::"))
		return true;
		// Doxygen tags like \c foo indicate identifiers.
		nridgeUnsubmitted Done Reply Inline Actions It's interesting to note that clang has a lexer and parser for doxygen comments (see e.g. `RawComment::parse()`), so we could conceivably do something more structured, but it's probably not worth the effort. nridge: It's interesting to note that clang has a lexer and parser for doxygen comments (see e.g.
		// Don't search too far back.
		// This duplicates clang's doxygen parser, revisit if it gets complicated.
		Before = Before.take_back(100); // Don't search too far back.
		auto Pos = Before.find_last_of("\\@");
		if (Pos != llvm::StringRef::npos) {
		llvm::StringRef Tag = Before.substr(Pos + 1).rtrim(' ');
		if (Tag == "p" \|\| Tag == "c" \|\| Tag == "class" \|\| Tag == "tparam" \|\|
		Tag == "param" \|\| Tag == "param[in]" \|\| Tag == "param[out]" \|\|
		Tag == "param[in,out]" \|\| Tag == "retval" \|\| Tag == "throw" \|\|
		Tag == "throws" \|\| Tag == "link")
		return true;
		}

		// Word contains underscore.
		// This handles things like snake_case and MACRO_CASE.
		if (Word.contains('_')) {
		return true;
		}
		// Word contains capital letter other than at beginning.
		// This handles things like lowerCamel and UpperCamel.
		// The check for also containing a lowercase letter is to rule out
		// initialisms like "HTTP".
		bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos;
		bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos;
		if (HasLower && HasUpper) {
		return true;
		}
		// FIXME: consider mid-sentence Capitalization?
		return false;
		}

		llvm::Optional<SpelledWord> SpelledWord::touching(SourceLocation SpelledLoc,
		const syntax::TokenBuffer &TB,
		const LangOptions &LangOpts) {
		const auto &SM = TB.sourceManager();
		auto Touching = syntax::spelledTokensTouching(SpelledLoc, TB);
		for (const auto &T : Touching) {
		// If the token is an identifier or a keyword, don't use any heuristics.
		if (tok::isAnyIdentifier(T.kind()) \|\| tok::getKeywordSpelling(T.kind())) {
		SpelledWord Result;
		Result.Location = T.location();
		Result.Text = T.text(SM);
		Result.LikelyIdentifier = tok::isAnyIdentifier(T.kind());
		Result.PartOfSpelledToken = &T;
		Result.SpelledToken = &T;
		auto Expanded =
		TB.expandedTokens(SM.getMacroArgExpandedLocation(T.location()));
		nridgeUnsubmitted Done Reply Inline Actions `Expanded.size() == 1` implies `!Expanded.empty()` nridge: `Expanded.size() == 1` implies `!Expanded.empty()`
		if (Expanded.size() == 1 && Expanded.front().text(SM) == Result.Text)
		Result.ExpandedToken = &Expanded.front();
		return Result;
		}
		}
		FileID File;
		unsigned Offset;
		std::tie(File, Offset) = SM.getDecomposedLoc(SpelledLoc);
		bool Invalid = false;
		llvm::StringRef Code = SM.getBufferData(File, &Invalid);
		if (Invalid)
		return llvm::None;
		unsigned B = Offset, E = Offset;
		while (B > 0 && isIdentifierBody(Code[B - 1]))
		--B;
		while (E < Code.size() && isIdentifierBody(Code[E]))
		++E;
		if (B == E)
		return llvm::None;

		SpelledWord Result;
		Result.Location = SM.getComposedLoc(File, B);
		Result.Text = Code.slice(B, E);
		Result.LikelyIdentifier =
		isLikelyIdentifier(Result.Text, Code.substr(0, B), Code.substr(E)) &&
		// should not be a keyword
		tok::isAnyIdentifier(
		IdentifierTable(LangOpts).get(Result.Text).getTokenID());
		for (const auto &T : Touching)
		if (T.location() <= Result.Location)
		Result.PartOfSpelledToken = &T;
		return Result;
		}

llvm::Optional<DefinedMacro> locateMacroAt(const syntax::Token &SpelledTok,		llvm::Optional<DefinedMacro> locateMacroAt(const syntax::Token &SpelledTok,
Preprocessor &PP) {		Preprocessor &PP) {
SourceLocation Loc = SpelledTok.location();		SourceLocation Loc = SpelledTok.location();
assert(Loc.isFileID());		assert(Loc.isFileID());
const auto &SM = PP.getSourceManager();		const auto &SM = PP.getSourceManager();
IdentifierInfo *IdentifierInfo = PP.getIdentifierInfo(SpelledTok.text(SM));		IdentifierInfo *IdentifierInfo = PP.getIdentifierInfo(SpelledTok.text(SM));
if (!IdentifierInfo \|\| !IdentifierInfo->hadMacroDefinition())		if (!IdentifierInfo \|\| !IdentifierInfo->hadMacroDefinition())
return None;		return None;
▲ Show 20 Lines • Show All 143 Lines • Show Last 20 Lines

clang-tools-extra/clangd/XRefs.h

	Show All 10 Lines
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_XREFS_H			#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_XREFS_H
	#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_XREFS_H			#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_XREFS_H

	#include "FormattedString.h"			#include "FormattedString.h"
	#include "Path.h"			#include "Path.h"
	#include "Protocol.h"			#include "Protocol.h"
				#include "SourceCode.h"
	#include "index/Index.h"			#include "index/Index.h"
	#include "index/SymbolLocation.h"			#include "index/SymbolLocation.h"
	#include "clang/AST/Type.h"			#include "clang/AST/Type.h"
	#include "clang/Format/Format.h"			#include "clang/Format/Format.h"
	#include "clang/Index/IndexSymbol.h"			#include "clang/Index/IndexSymbol.h"
	#include "llvm/ADT/Optional.h"			#include "llvm/ADT/Optional.h"
	#include "llvm/Support/raw_ostream.h"			#include "llvm/Support/raw_ostream.h"
	#include <vector>			#include <vector>

	namespace clang {			namespace clang {
				namespace syntax {
				class Token;
				class TokenBuffer;
				} // namespace syntax
	namespace clangd {			namespace clangd {
	class ParsedAST;			class ParsedAST;

	// Describes where a symbol is declared and defined (as far as clangd knows).			// Describes where a symbol is declared and defined (as far as clangd knows).
	// There are three cases:			// There are three cases:
	// - a declaration only, no definition is known (e.g. only header seen)			// - a declaration only, no definition is known (e.g. only header seen)
	// - a declaration and a distinct definition (e.g. function declared in header)			// - a declaration and a distinct definition (e.g. function declared in header)
	// - a declaration and an equal definition (e.g. inline function, or class)			// - a declaration and an equal definition (e.g. inline function, or class)
	// For some types of symbol, e.g. macros, definition == declaration always.			// For some types of symbol, e.g. macros, definition == declaration always.
	struct LocatedSymbol {			struct LocatedSymbol {
	// The (unqualified) name of the symbol.			// The (unqualified) name of the symbol.
	std::string Name;			std::string Name;
	// The canonical or best declaration: where most users find its interface.			// The canonical or best declaration: where most users find its interface.
	Location PreferredDeclaration;			Location PreferredDeclaration;
	// Where the symbol is defined, if known. May equal PreferredDeclaration.			// Where the symbol is defined, if known. May equal PreferredDeclaration.
	llvm::Optional<Location> Definition;			llvm::Optional<Location> Definition;
	};			};
	llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LocatedSymbol &);			llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LocatedSymbol &);
	/// Get definition of symbol at a specified \p Pos.			/// Get definition of symbol at a specified \p Pos.
	/// Multiple locations may be returned, corresponding to distinct symbols.			/// Multiple locations may be returned, corresponding to distinct symbols.
	std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,			std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
	const SymbolIndex *Index = nullptr);			const SymbolIndex *Index = nullptr);

	// Tries to provide a textual fallback for locating a symbol referenced at			// Tries to provide a textual fallback for locating a symbol by looking up the
				nridgeUnsubmitted Done Reply Inline Actions Do we want to bake this condition into the interface of this function, or would it be more appropriate to just tell the caller whether it's a real identifier token or not? In particular, given our plan for handling some dependent cases, the caller may want to do something along the lines of `if (notRealIdentifier \|\| isDependent) { /* use the textual heuristic / }`. nridge:* Do we want to bake this condition into the interface of this function, or would it be more…
	// a location, by looking up the word under the cursor as a symbol name in the			// word under the cursor as a symbol name in the index.
	// index. The aim is to pick up references to symbols in contexts where			// The aim is to pick up references to symbols in contexts where
	// AST-based resolution does not work, such as comments, strings, and PP			// AST-based resolution does not work, such as comments, strings, and PP
	// disabled regions. The implementation takes a number of measures to avoid			// disabled regions.
	// false positives, such as looking for some signal that the word at the
	// given location is likely to be an identifier. The function does not
	// currently return results for locations that end up as real expanded
	// tokens, although this may be relaxed for e.g. dependent code in the future.
	// (This is for internal use by locateSymbolAt, and is exposed for testing).			// (This is for internal use by locateSymbolAt, and is exposed for testing).
	std::vector<LocatedSymbol>			std::vector<LocatedSymbol>
	locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index,			locateSymbolTextually(const SpelledWord &Word, ParsedAST &AST,
	SourceLocation Loc,			const SymbolIndex *Index,
	const std::string &MainFilePath);			const std::string &MainFilePath);

				// Try to find a proximate occurrence of `Word` as an identifier, which can be
				// used to resolve it.
				// (This is for internal use by locateSymbolAt, and is exposed for testing).
				const syntax::Token *findNearbyIdentifier(const SpelledWord &Word,
				const syntax::TokenBuffer &TB);

	/// Get all document links			/// Get all document links
	std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST);			std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST);

	/// Returns highlights for all usages of a symbol at \p Pos.			/// Returns highlights for all usages of a symbol at \p Pos.
	std::vector<DocumentHighlight> findDocumentHighlights(ParsedAST &AST,			std::vector<DocumentHighlight> findDocumentHighlights(ParsedAST &AST,
	Position Pos);			Position Pos);

	struct ReferencesResult {			struct ReferencesResult {
	Show All 33 Lines

clang-tools-extra/clangd/XRefs.cpp

Show All 28 Lines
#include "clang/AST/DeclTemplate.h"		#include "clang/AST/DeclTemplate.h"
#include "clang/AST/ExprCXX.h"		#include "clang/AST/ExprCXX.h"
#include "clang/AST/Type.h"		#include "clang/AST/Type.h"
#include "clang/Basic/CharInfo.h"		#include "clang/Basic/CharInfo.h"
#include "clang/Basic/LLVM.h"		#include "clang/Basic/LLVM.h"
#include "clang/Basic/LangOptions.h"		#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"		#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"		#include "clang/Basic/SourceManager.h"
		#include "clang/Basic/TokenKinds.h"
#include "clang/Index/IndexDataConsumer.h"		#include "clang/Index/IndexDataConsumer.h"
#include "clang/Index/IndexSymbol.h"		#include "clang/Index/IndexSymbol.h"
#include "clang/Index/IndexingAction.h"		#include "clang/Index/IndexingAction.h"
#include "clang/Index/IndexingOptions.h"		#include "clang/Index/IndexingOptions.h"
#include "clang/Index/USRGeneration.h"		#include "clang/Index/USRGeneration.h"
#include "clang/Tooling/Syntax/Tokens.h"		#include "clang/Tooling/Syntax/Tokens.h"
#include "llvm/ADT/ArrayRef.h"		#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"		#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"		#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"		#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"		#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"		#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"		#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"		#include "llvm/Support/Error.h"
		#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"		#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"		#include "llvm/Support/raw_ostream.h"

namespace clang {		namespace clang {
namespace clangd {		namespace clangd {
namespace {		namespace {

// Returns the single definition of the entity declared by D, if visible.		// Returns the single definition of the entity declared by D, if visible.
▲ Show 20 Lines • Show All 251 Lines • ▼ Show 20 Lines	Index->lookup(QueryRequest, [&](const Symbol &Sym) {
R.PreferredDeclaration = *Loc;		R.PreferredDeclaration = *Loc;
}		}
});		});
}		}

return Result;		return Result;
}		}

llvm::StringRef wordTouching(llvm::StringRef Code, unsigned Offset) {		bool tokenSpelledAt(SourceLocation SpellingLoc, const syntax::TokenBuffer &TB) {
unsigned B = Offset, E = Offset;		auto ExpandedTokens = TB.expandedTokens(
while (B > 0 && isIdentifierBody(Code[B - 1]))		TB.sourceManager().getMacroArgExpandedLocation(SpellingLoc));
--B;		return !ExpandedTokens.empty();
while (E < Code.size() && isIdentifierBody(Code[E]))
++E;
return Code.slice(B, E);
}		}

bool isLikelyToBeIdentifier(StringRef Word) {		llvm::StringRef sourcePrefix(SourceLocation Loc, const SourceManager &SM) {
// Word contains underscore.		auto D = SM.getDecomposedLoc(Loc);
// This handles things like snake_case and MACRO_CASE.		bool Invalid = false;
if (Word.contains('_')) {		llvm::StringRef Buf = SM.getBufferData(D.first, &Invalid);
return true;		if (Invalid \|\| D.second > Buf.size())
}		return "";
// Word contains capital letter other than at beginning.		return Buf.substr(0, D.second);
// This handles things like lowerCamel and UpperCamel.
// The check for also containing a lowercase letter is to rule out
// initialisms like "HTTP".
bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos;
bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos;
if (HasLower && HasUpper) {
return true;
}
// FIXME: There are other signals we could listen for.
// Some of these require inspecting the surroundings of the word as well.
// - mid-sentence Capitalization
// - markup like quotes / backticks / brackets / "\p"
// - word has a qualifier (foo::bar)
return false;
}

bool tokenSurvivedPreprocessing(SourceLocation Loc,
const syntax::TokenBuffer &TB) {
auto WordExpandedTokens =
TB.expandedTokens(TB.sourceManager().getMacroArgExpandedLocation(Loc));
return !WordExpandedTokens.empty();
}		}

} // namespace		} // namespace

std::vector<LocatedSymbol>		std::vector<LocatedSymbol>
locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index,		locateSymbolTextually(const SpelledWord &Word, ParsedAST &AST,
SourceLocation Loc,		const SymbolIndex *Index,
		nridgeUnsubmitted Done Reply Inline Actions nit: I think separate names for the `FileID` and the position would read better nridge: nit: I think separate names for the `FileID` and the position would read better
const std::string &MainFilePath) {		const std::string &MainFilePath) {
const auto &SM = AST.getSourceManager();		// Don't use heuristics if this is a real identifier, or not an identifier.
		if (Word.ExpandedToken \|\| !Word.LikelyIdentifier \|\| !Index)
// Get the raw word at the specified location.
unsigned Pos;
FileID File;
std::tie(File, Pos) = SM.getDecomposedLoc(Loc);
llvm::StringRef Code = SM.getBufferData(File);
llvm::StringRef Word = wordTouching(Code, Pos);
if (Word.empty())
return {};
unsigned WordOffset = Word.data() - Code.data();
SourceLocation WordStart = SM.getComposedLoc(File, WordOffset);

// Attempt to determine the kind of token that contains the word,
// and bail if it's a string literal. Note that we cannot always
// determine the token kind (e.g. comments, for which we do want
// to activate, are not retained by TokenBuffer).
for (syntax::Token T :
syntax::spelledTokensTouching(WordStart, AST.getTokens())) {
if (T.range(AST.getSourceManager()).touches(WordOffset + Word.size())) {
if (isStringLiteral(T.kind()))
return {};
}
}

// Do not consider tokens that survived preprocessing.
// We are erring on the safe side here, as a user may expect to get
// accurate (as opposed to textual-heuristic) results for such tokens.
// FIXME: Relax this for dependent code.
if (tokenSurvivedPreprocessing(WordStart, AST.getTokens()))
return {};		return {};
		// We don't want to handle words in string literals. It'd be nice to whitelist
// Additionally filter for signals that the word is likely to be an		// comments instead, but they're not retained in TokenBuffer.
// identifier. This avoids triggering on e.g. random words in a comment.		if (Word.PartOfSpelledToken &&
if (!isLikelyToBeIdentifier(Word))		isStringLiteral(Word.PartOfSpelledToken->kind()))
return {};		return {};

		const auto &SM = AST.getSourceManager();
// Look up the selected word in the index.		// Look up the selected word in the index.
FuzzyFindRequest Req;		FuzzyFindRequest Req;
Req.Query = Word.str();		Req.Query = Word.Text.str();
Req.ProximityPaths = {MainFilePath};		Req.ProximityPaths = {MainFilePath};
Req.Scopes = visibleNamespaces(Code.take_front(Pos), AST.getLangOpts());		// Find the namespaces to query by lexing the file.
		Req.Scopes =
		visibleNamespaces(sourcePrefix(Word.Location, SM), AST.getLangOpts());
// FIXME: For extra strictness, consider AnyScope=false.		// FIXME: For extra strictness, consider AnyScope=false.
Req.AnyScope = true;		Req.AnyScope = true;
// We limit the results to 3 further below. This limit is to avoid fetching		// We limit the results to 3 further below. This limit is to avoid fetching
// too much data, while still likely having enough for 3 results to remain		// too much data, while still likely having enough for 3 results to remain
// after additional filtering.		// after additional filtering.
Req.Limit = 10;		Req.Limit = 10;
bool TooMany = false;		bool TooMany = false;
using ScoredLocatedSymbol = std::pair<float, LocatedSymbol>;		using ScoredLocatedSymbol = std::pair<float, LocatedSymbol>;
std::vector<ScoredLocatedSymbol> ScoredResults;		std::vector<ScoredLocatedSymbol> ScoredResults;
Index->fuzzyFind(Req, [&](const Symbol &Sym) {		Index->fuzzyFind(Req, [&](const Symbol &Sym) {
// Only consider exact name matches, including case.		// Only consider exact name matches, including case.
// This is to avoid too many false positives.		// This is to avoid too many false positives.
// We could relax this in the future (e.g. to allow for typos) if we make		// We could relax this in the future (e.g. to allow for typos) if we make
// the query more accurate by other means.		// the query more accurate by other means.
		nridgeUnsubmitted Done Reply Inline Actions nit: space after `if` nridge: nit: space after `if`
if (Sym.Name != Word)		if (Sym.Name != Word.Text)
return;		return;

// Exclude constructor results. They have the same name as the class,		// Exclude constructor results. They have the same name as the class,
// but we don't have enough context to prefer them over the class.		// but we don't have enough context to prefer them over the class.
if (Sym.SymInfo.Kind == index::SymbolKind::Constructor)		if (Sym.SymInfo.Kind == index::SymbolKind::Constructor)
return;		return;

auto MaybeDeclLoc =		auto MaybeDeclLoc =
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines	llvm::sort(ScoredResults,
return A.first > B.first;		return A.first > B.first;
});		});
std::vector<LocatedSymbol> Results;		std::vector<LocatedSymbol> Results;
for (auto &Res : std::move(ScoredResults))		for (auto &Res : std::move(ScoredResults))
Results.push_back(std::move(Res.second));		Results.push_back(std::move(Res.second));
return Results;		return Results;
}		}

		const syntax::Token *findNearbyIdentifier(const SpelledWord &Word,
		const syntax::TokenBuffer &TB) {
		// Don't use heuristics if this is a real identifier.
		// Unlikely identifiers are OK if they were used as identifiers nearby.
		if (Word.ExpandedToken)
		return nullptr;
		// We don't want to handle words in string literals. It'd be nice to whitelist
		// comments instead, but they're not retained in TokenBuffer.
		if (Word.PartOfSpelledToken &&
		isStringLiteral(Word.PartOfSpelledToken->kind()))
		return {};

		const SourceManager &SM = TB.sourceManager();
		// We prefer the closest possible token, line-wise. Backwards is penalized.
		// Ties are implicitly broken by traversal order (first-one-wins).
		nridgeUnsubmitted Done Reply Inline Actions Is this any different from `getSpellingLineNumber(Loc)`? nridge: Is this any different from `getSpellingLineNumber(Loc)`?
		auto File = SM.getFileID(Word.Location);
		unsigned WordLine = SM.getSpellingLineNumber(Word.Location);
		auto Cost = [&](SourceLocation Loc) -> unsigned {
		assert(SM.getFileID(Loc) == File && "spelled token in wrong file?");
		unsigned Line = SM.getSpellingLineNumber(Loc);
		if (Line > WordLine)
		return 1 + llvm::Log2_64(Line - WordLine);
		if (Line < WordLine)
		return 2 + llvm::Log2_64(WordLine - Line);
		return 0;
		};
		const syntax::Token *BestTok = nullptr;
		// Search bounds are based on word length: 2^N lines forward.
		unsigned BestCost = Word.Text.size() + 1;

		// Updates BestTok and BestCost if Tok is a good candidate.
		// May return true if the cost is too high for this token.
		auto Consider = [&](const syntax::Token &Tok) {
		if (!(Tok.kind() == tok::identifier && Tok.text(SM) == Word.Text))
		return false;
		// No point guessing the same location we started with.
		if (Tok.location() == Word.Location)
		return false;
		// We've done cheap checks, compute cost so we can break the caller's loop.
		unsigned TokCost = Cost(Tok.location());
		if (TokCost >= BestCost)
		return true; // causes the outer loop to break.
		// Allow locations that might be part of the AST, and macros (even if empty)
		// but not things like disabled preprocessor sections.
		if (!(tokenSpelledAt(Tok.location(), TB) \|\| TB.expansionStartingAt(&Tok)))
		return false;
		// We already verified this token is an improvement.
		BestCost = TokCost;
		BestTok = &Tok;
		return false;
		};
		auto SpelledTokens = TB.spelledTokens(File);
		// Find where the word occurred in the token stream, to search forward & back.
		auto *I = llvm::partition_point(SpelledTokens, [&](const syntax::Token &T) {
		assert(SM.getFileID(T.location()) == SM.getFileID(Word.Location));
		return T.location() >= Word.Location; // Comparison OK: same file.
		});
		// Search for matches after the cursor.
		for (const syntax::Token &Tok : llvm::makeArrayRef(I, SpelledTokens.end()))
		if (Consider(Tok))
		break; // costs of later tokens are greater...
		// Search for matches before the cursor.
		for (const syntax::Token &Tok :
		llvm::reverse(llvm::makeArrayRef(SpelledTokens.begin(), I)))
		if (Consider(Tok))
		break;

		if (BestTok)
		vlog(
		"Word {0} under cursor {1} isn't a token (after PP), trying nearby {2}",
		Word.Text, Word.Location.printToString(SM),
		BestTok->location().printToString(SM));

		return BestTok;
		}

std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,		std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
const SymbolIndex *Index) {		const SymbolIndex *Index) {
const auto &SM = AST.getSourceManager();		const auto &SM = AST.getSourceManager();
auto MainFilePath =		auto MainFilePath =
getCanonicalPath(SM.getFileEntryForID(SM.getMainFileID()), SM);		getCanonicalPath(SM.getFileEntryForID(SM.getMainFileID()), SM);
if (!MainFilePath) {		if (!MainFilePath) {
elog("Failed to get a path for the main file, so no references");		elog("Failed to get a path for the main file, so no references");
return {};		return {};
Show All 19 Lines	if (auto Macro =
// expansion.)		// expansion.)
return {*std::move(Macro)};		return {*std::move(Macro)};

auto ASTResults =		auto ASTResults =
locateASTReferent(CurLoc, TouchedIdentifier, AST, MainFilePath, Index);		locateASTReferent(CurLoc, TouchedIdentifier, AST, MainFilePath, Index);
if (!ASTResults.empty())		if (!ASTResults.empty())
return ASTResults;		return ASTResults;

return locateSymbolNamedTextuallyAt(AST, Index, CurLoc, MainFilePath);		// If the cursor can't be resolved directly, try fallback strategies.
		auto Word =
		SpelledWord::touching(*CurLoc, AST.getTokens(), AST.getLangOpts());
		if (Word) {
		// Is the same word nearby a real identifier that might refer to something?
		if (const syntax::Token *NearbyIdent =
		findNearbyIdentifier(*Word, AST.getTokens())) {
		if (auto Macro = locateMacroReferent(NearbyIdent, AST, MainFilePath))
		return {*std::move(Macro)};
		ASTResults = locateASTReferent(NearbyIdent->location(), NearbyIdent, AST,
		*MainFilePath, Index);
		if (!ASTResults.empty())
		return ASTResults;
		}
		// No nearby word, or it didn't refer to anything either. Try the index.
		auto TextualResults =
		locateSymbolTextually(Word, AST, Index, MainFilePath);
		if (!TextualResults.empty())
		return TextualResults;
		}

		return {};
}		}

std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST) {		std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST) {
const auto &SM = AST.getSourceManager();		const auto &SM = AST.getSourceManager();
auto MainFilePath =		auto MainFilePath =
getCanonicalPath(SM.getFileEntryForID(SM.getMainFileID()), SM);		getCanonicalPath(SM.getFileEntryForID(SM.getMainFileID()), SM);
if (!MainFilePath) {		if (!MainFilePath) {
elog("Failed to get a path for the main file, so no links");		elog("Failed to get a path for the main file, so no links");
▲ Show 20 Lines • Show All 567 Lines • Show Last 20 Lines

clang-tools-extra/clangd/unittests/SourceCodeTests.cpp

//===-- SourceCodeTests.cpp ------------------------------------- C++ --===//		//===-- SourceCodeTests.cpp ------------------------------------- C++ --===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
#include "Annotations.h"		#include "Annotations.h"
#include "Context.h"		#include "Context.h"
#include "Protocol.h"		#include "Protocol.h"
#include "SourceCode.h"		#include "SourceCode.h"
#include "TestTU.h"		#include "TestTU.h"
#include "clang/Basic/LangOptions.h"		#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"		#include "clang/Basic/SourceLocation.h"
		#include "clang/Basic/TokenKinds.h"
#include "clang/Format/Format.h"		#include "clang/Format/Format.h"
#include "llvm/Support/Error.h"		#include "llvm/Support/Error.h"
#include "llvm/Support/raw_os_ostream.h"		#include "llvm/Support/raw_os_ostream.h"
#include "llvm/Testing/Support/Annotations.h"		#include "llvm/Testing/Support/Annotations.h"
#include "llvm/Testing/Support/Error.h"		#include "llvm/Testing/Support/Error.h"
#include "gmock/gmock.h"		#include "gmock/gmock.h"
#include "gtest/gtest.h"		#include "gtest/gtest.h"
#include <tuple>		#include <tuple>
▲ Show 20 Lines • Show All 300 Lines • ▼ Show 20 Lines	TEST(SourceCodeTests, CollectWords) {
)cpp");		)cpp");
std::set<StringRef> ActualWords(Words.keys().begin(), Words.keys().end());		std::set<StringRef> ActualWords(Words.keys().begin(), Words.keys().end());
std::set<StringRef> ExpectedWords = {"define", "fizz", "buzz", "this",		std::set<StringRef> ExpectedWords = {"define", "fizz", "buzz", "this",
"comment", "string", "some", "text",		"comment", "string", "some", "text",
"return", "magic", "word"};		"return", "magic", "word"};
EXPECT_EQ(ActualWords, ExpectedWords);		EXPECT_EQ(ActualWords, ExpectedWords);
}		}

		class SpelledWordsTest : public ::testing::Test {
		llvm::Optional<ParsedAST> AST;

		llvm::Optional<SpelledWord> tryWord(const char *Text) {
		llvm::Annotations A(Text);
		auto TU = TestTU::withCode(A.code());
		AST = TU.build();
		auto SW = SpelledWord::touching(
		AST->getSourceManager().getComposedLoc(
		AST->getSourceManager().getMainFileID(), A.point()),
		AST->getTokens(), AST->getLangOpts());
		if (A.ranges().size()) {
		llvm::StringRef Want = A.code().slice(A.range().Begin, A.range().End);
		EXPECT_EQ(Want, SW->Text) << Text;
		}
		return SW;
		}

		protected:
		SpelledWord word(const char *Text) {
		auto Result = tryWord(Text);
		EXPECT_TRUE(Result) << Text;
		return Result.getValueOr(SpelledWord());
		}

		void noWord(const char *Text) { EXPECT_FALSE(tryWord(Text)) << Text; }
		};

		TEST_F(SpelledWordsTest, HeuristicBoundaries) {
		word("// [[^foo]] ");
		word("// [[f^oo]] ");
		word("// [[foo^]] ");
		word("// [[foo^]]+bar ");
		noWord("//^ foo ");
		noWord("// foo ^");
		}

		TEST_F(SpelledWordsTest, LikelyIdentifier) {
		EXPECT_FALSE(word("// ^foo ").LikelyIdentifier);
		EXPECT_TRUE(word("// [[^foo_bar]] ").LikelyIdentifier);
		EXPECT_TRUE(word("// [[^fooBar]] ").LikelyIdentifier);
		EXPECT_FALSE(word("// H^TTP ").LikelyIdentifier);
		EXPECT_TRUE(word("// \\p [[^foo]] ").LikelyIdentifier);
		EXPECT_TRUE(word("// @param[in] [[^foo]] ").LikelyIdentifier);
		EXPECT_TRUE(word("// `[[f^oo]]` ").LikelyIdentifier);
		EXPECT_TRUE(word("// bar::[[f^oo]] ").LikelyIdentifier);
		nridgeUnsubmitted Done Reply Inline Actions Maybe test the initialism thing with `EXPECT_FALSE(word("// [[H^TTP]] ").LikelyIdentifier);` nridge: Maybe test the initialism thing with `EXPECT_FALSE(word("// [[H^TTP]] ").LikelyIdentifier);`
		EXPECT_TRUE(word("// [[f^oo]]::bar ").LikelyIdentifier);
		}

		TEST_F(SpelledWordsTest, Comment) {
		auto W = word("// [[^foo]]");
		EXPECT_FALSE(W.PartOfSpelledToken);
		EXPECT_FALSE(W.SpelledToken);
		EXPECT_FALSE(W.ExpandedToken);
		}

		TEST_F(SpelledWordsTest, PartOfString) {
		auto W = word(R"( auto str = "foo [[^bar]] baz"; )");
		ASSERT_TRUE(W.PartOfSpelledToken);
		EXPECT_EQ(W.PartOfSpelledToken->kind(), tok::string_literal);
		EXPECT_FALSE(W.SpelledToken);
		EXPECT_FALSE(W.ExpandedToken);
		}

		TEST_F(SpelledWordsTest, DisabledSection) {
		auto W = word(R"cpp(
		#if 0
		foo [[^bar]] baz
		#endif
		)cpp");
		ASSERT_TRUE(W.SpelledToken);
		EXPECT_EQ(W.SpelledToken->kind(), tok::identifier);
		EXPECT_EQ(W.SpelledToken, W.PartOfSpelledToken);
		EXPECT_FALSE(W.ExpandedToken);
		}

		TEST_F(SpelledWordsTest, Macros) {
		auto W = word(R"cpp(
		#define ID(X) X
		ID(int [[^i]]);
		)cpp");
		ASSERT_TRUE(W.SpelledToken);
		EXPECT_EQ(W.SpelledToken->kind(), tok::identifier);
		EXPECT_EQ(W.SpelledToken, W.PartOfSpelledToken);
		ASSERT_TRUE(W.ExpandedToken);
		EXPECT_EQ(W.ExpandedToken->kind(), tok::identifier);

		W = word(R"cpp(
		#define OBJECT Expansion;
		int [[^OBJECT]];
		)cpp");
		EXPECT_TRUE(W.SpelledToken);
		EXPECT_FALSE(W.ExpandedToken) << "Expanded token is spelled differently";
		}

TEST(SourceCodeTests, VisibleNamespaces) {		TEST(SourceCodeTests, VisibleNamespaces) {
std::vector<std::pair<const char *, std::vector<std::string>>> Cases = {		std::vector<std::pair<const char *, std::vector<std::string>>> Cases = {
{		{
R"cpp(		R"cpp(
// Using directive resolved against enclosing namespaces.		// Using directive resolved against enclosing namespaces.
using namespace foo;		using namespace foo;
namespace ns {		namespace ns {
using namespace bar;		using namespace bar;
▲ Show 20 Lines • Show All 345 Lines • Show Last 20 Lines

clang-tools-extra/clangd/unittests/XRefsTests.cpp

Show First 20 Lines • Show All 679 Lines • ▼ Show 20 Lines	for (const char *Test : Tests) {
llvm::Optional<Range> WantDecl;		llvm::Optional<Range> WantDecl;
if (!T.ranges().empty())		if (!T.ranges().empty())
WantDecl = T.range();		WantDecl = T.range();

auto TU = TestTU::withCode(T.code());		auto TU = TestTU::withCode(T.code());

auto AST = TU.build();		auto AST = TU.build();
auto Index = TU.index();		auto Index = TU.index();
auto Results = locateSymbolNamedTextuallyAt(		auto Word = SpelledWord::touching(
AST, Index.get(),
cantFail(sourceLocationInMainFile(AST.getSourceManager(), T.point())),		cantFail(sourceLocationInMainFile(AST.getSourceManager(), T.point())),
testPath(TU.Filename));		AST.getTokens(), AST.getLangOpts());
		if (!Word) {
		ADD_FAILURE() << "No word touching point!" << Test;
		continue;
		}
		auto Results =
		locateSymbolTextually(*Word, AST, Index.get(), testPath(TU.Filename));

if (!WantDecl) {		if (!WantDecl) {
EXPECT_THAT(Results, IsEmpty()) << Test;		EXPECT_THAT(Results, IsEmpty()) << Test;
} else {		} else {
ASSERT_THAT(Results, ::testing::SizeIs(1)) << Test;		ASSERT_THAT(Results, ::testing::SizeIs(1)) << Test;
EXPECT_EQ(Results[0].PreferredDeclaration.range, *WantDecl) << Test;		EXPECT_EQ(Results[0].PreferredDeclaration.range, *WantDecl) << Test;
}		}
}		}
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines	auto T = Annotations(R"cpp(
};		};
// Will call u^niqueMethodName() on t.		// Will call u^niqueMethodName() on t.
template <typename T>		template <typename T>
void f(T t);		void f(T t);
)cpp");		)cpp");
auto TU = TestTU::withCode(T.code());		auto TU = TestTU::withCode(T.code());
auto AST = TU.build();		auto AST = TU.build();
auto Index = TU.index();		auto Index = TU.index();
auto Results = locateSymbolNamedTextuallyAt(		auto Word = SpelledWord::touching(
AST, Index.get(),
cantFail(sourceLocationInMainFile(AST.getSourceManager(), T.point())),		cantFail(sourceLocationInMainFile(AST.getSourceManager(), T.point())),
testPath(TU.Filename));		AST.getTokens(), AST.getLangOpts());
		ASSERT_TRUE(Word);
		auto Results =
		locateSymbolTextually(*Word, AST, Index.get(), testPath(TU.Filename));
EXPECT_THAT(Results,		EXPECT_THAT(Results,
UnorderedElementsAre(Sym("uniqueMethodName", T.range("FooLoc")),		UnorderedElementsAre(Sym("uniqueMethodName", T.range("FooLoc")),
Sym("uniqueMethodName", T.range("BarLoc"))));		Sym("uniqueMethodName", T.range("BarLoc"))));
}		}

TEST(LocateSymbol, TemplateTypedefs) {		TEST(LocateSymbol, TemplateTypedefs) {
auto T = Annotations(R"cpp(		auto T = Annotations(R"cpp(
template <class T> struct function {};		template <class T> struct function {};
▲ Show 20 Lines • Show All 177 Lines • ▼ Show 20 Lines	TEST(LocateSymbol, WithPreamble) {
Server.addDocument(FooCpp, FooWithoutHeader.code(), "null",		Server.addDocument(FooCpp, FooWithoutHeader.code(), "null",
WantDiagnostics::Yes);		WantDiagnostics::Yes);
// Use the AST being built in above request.		// Use the AST being built in above request.
EXPECT_THAT(		EXPECT_THAT(
cantFail(runLocateSymbolAt(Server, FooCpp, FooWithoutHeader.point())),		cantFail(runLocateSymbolAt(Server, FooCpp, FooWithoutHeader.point())),
ElementsAre(Sym("foo", FooWithoutHeader.range())));		ElementsAre(Sym("foo", FooWithoutHeader.range())));
}		}

		TEST(LocateSymbol, NearbyTokenSmoke) {
		auto T = Annotations(R"cpp(
		// prints e^rr and crashes
		void die(const char* [[err]]);
		)cpp");
		auto AST = TestTU::withCode(T.code()).build();
		// We don't pass an index, so can't hit index-based fallback.
		EXPECT_THAT(locateSymbolAt(AST, T.point()),
		ElementsAre(Sym("err", T.range())));
		}

		TEST(LocateSymbol, NearbyIdentifier) {
		const char *Tests[] = {
		nridgeUnsubmitted Done Reply Inline Actions Tangentially related, but what do you think about the issue I raised in this mailing list thread about testcase style? nridge: Tangentially related, but what do you think about the issue I raised in [this mailing list…
		sammccallAuthorUnsubmitted Done Reply Inline Actions Posted a reply on the thread. TL;DR: I hate that too the style brings several benefits I don't know what would be better that's available but we could probably build something sammccall: Posted a reply on the thread. TL;DR: - I hate that too - the style brings several benefits…
		R"cpp(
		// regular identifiers (won't trigger)
		int hello;
		int y = he^llo;
		)cpp",
		R"cpp(
		// disabled preprocessor sections
		int [[hello]];
		#if 0
		int y = ^hello;
		#endif
		)cpp",
		R"cpp(
		// comments
		// he^llo, world
		int [[hello]];
		)cpp",
		R"cpp(
		// not triggered by string literals
		int hello;
		const char* greeting = "h^ello, world";
		nridgeUnsubmitted Done Reply Inline Actions What's the rationale for supporting string literals for the nearby-ident heuristic, but not the index heuristic? nridge: What's the rationale for supporting string literals for the nearby-ident heuristic, but not the…
		)cpp",

		R"cpp(
		// can refer to macro invocations
		nridgeUnsubmitted Done Reply Inline Actions (Did you mean to write a test case where the macro invocation expands to nothing?) nridge: (Did you mean to write a test case where the macro invocation expands to nothing?)
		#define INT int
		[[INT]] x;
		// I^NT
		)cpp",

		R"cpp(
		// can refer to macro invocations (even if they expand to nothing)
		nridgeUnsubmitted Done Reply Inline Actions // (taking into account the penalty for going backwards) nridge: // (taking into account the penalty for going backwards)
		#define EMPTY
		[[EMPTY]] int x;
		// E^MPTY
		)cpp",

		R"cpp(
		// prefer nearest occurrence, backwards is worse than forwards
		int hello;
		int x = hello;
		// h^ello
		int y = [[hello]];
		int z = hello;
		)cpp",

		R"cpp(
		// short identifiers find near results
		int [[hi]];
		// h^i
		)cpp",
		R"cpp(
		// short identifiers don't find far results
		int hi;



		// h^i
		)cpp",
		nridgeUnsubmitted Done Reply Inline Actions nit: space after `` nridge:* nit: space after `*`
		};
		for (const char *Test : Tests) {
		Annotations T(Test);
		auto AST = TestTU::withCode(T.code()).build();
		const auto &SM = AST.getSourceManager();
		llvm::Optional<Range> Nearby;
		auto Word =
		nridgeUnsubmitted Done Reply Inline Actions `EXPECT_EQ`? nridge: `EXPECT_EQ`?
		SpelledWord::touching(cantFail(sourceLocationInMainFile(SM, T.point())),
		AST.getTokens(), AST.getLangOpts());
		if (!Word) {
		ADD_FAILURE() << "No word at point! " << Test;
		continue;
		}
		if (const auto Tok = findNearbyIdentifier(Word, AST.getTokens()))
		Nearby = halfOpenToRange(SM, CharSourceRange::getCharRange(
		Tok->location(), Tok->endLocation()));
		if (T.ranges().empty())
		EXPECT_THAT(Nearby, Eq(llvm::None)) << Test;
		else
		EXPECT_EQ(Nearby, T.range()) << Test;
		}
		}

TEST(FindReferences, WithinAST) {		TEST(FindReferences, WithinAST) {
const char *Tests[] = {		const char *Tests[] = {
R"cpp(// Local variable		R"cpp(// Local variable
int main() {		int main() {
int [[foo]];		int [[foo]];
[[^foo]] = 2;		[[^foo]] = 2;
int test1 = [[foo]];		int test1 = [[foo]];
}		}
▲ Show 20 Lines • Show All 421 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[clangd] go-to-def on names in comments etc that are used nearby.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 259335

clang-tools-extra/clangd/SourceCode.h

clang-tools-extra/clangd/SourceCode.cpp

clang-tools-extra/clangd/XRefs.h

clang-tools-extra/clangd/XRefs.cpp

clang-tools-extra/clangd/unittests/SourceCodeTests.cpp

clang-tools-extra/clangd/unittests/XRefsTests.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[clangd] go-to-def on names in comments etc that are used nearby.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 259335

clang-tools-extra/clangd/SourceCode.h

clang-tools-extra/clangd/SourceCode.cpp

clang-tools-extra/clangd/XRefs.h

clang-tools-extra/clangd/XRefs.cpp

clang-tools-extra/clangd/unittests/SourceCodeTests.cpp

clang-tools-extra/clangd/unittests/XRefsTests.cpp

[clangd] go-to-def on names in comments etc that are used nearby.
ClosedPublic