Diff 248128

clang-tools-extra/clangd/ParsedAST.cpp

Show First 20 Lines • Show All 108 Lines • ▼ Show 20 Lines
// So we replay the non-transitive #includes that appear in the main-file.		// So we replay the non-transitive #includes that appear in the main-file.
// It would be nice to replay other events (macro definitions, ifdefs etc) but		// It would be nice to replay other events (macro definitions, ifdefs etc) but
// this addresses the most common cases fairly cheaply.		// this addresses the most common cases fairly cheaply.
class ReplayPreamble : private PPCallbacks {		class ReplayPreamble : private PPCallbacks {
public:		public:
// Attach preprocessor hooks such that preamble events will be injected at		// Attach preprocessor hooks such that preamble events will be injected at
// the appropriate time.		// the appropriate time.
// Events will be delivered to the currently registered PP callbacks.		// Events will be delivered to the currently registered PP callbacks.
static void attach(const IncludeStructure &Includes,		static void attach(const IncludeStructure &Includes, CompilerInstance &Clang,
CompilerInstance &Clang) {		const PreambleBounds &PB) {
auto &PP = Clang.getPreprocessor();		auto &PP = Clang.getPreprocessor();
auto *ExistingCallbacks = PP.getPPCallbacks();		auto *ExistingCallbacks = PP.getPPCallbacks();
// No need to replay events if nobody is listening.		// No need to replay events if nobody is listening.
if (!ExistingCallbacks)		if (!ExistingCallbacks)
return;		return;
PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(		PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(new ReplayPreamble(
new ReplayPreamble(Includes, ExistingCallbacks,		Includes, ExistingCallbacks, Clang.getSourceManager(), PP,
Clang.getSourceManager(), PP, Clang.getLangOpts())));		Clang.getLangOpts(), PB)));
// We're relying on the fact that addPPCallbacks keeps the old PPCallbacks		// We're relying on the fact that addPPCallbacks keeps the old PPCallbacks
// around, creating a chaining wrapper. Guard against other implementations.		// around, creating a chaining wrapper. Guard against other implementations.
assert(PP.getPPCallbacks() != ExistingCallbacks &&		assert(PP.getPPCallbacks() != ExistingCallbacks &&
"Expected chaining implementation");		"Expected chaining implementation");
}		}

private:		private:
ReplayPreamble(const IncludeStructure &Includes, PPCallbacks *Delegate,		ReplayPreamble(const IncludeStructure &Includes, PPCallbacks *Delegate,
const SourceManager &SM, Preprocessor &PP,		const SourceManager &SM, Preprocessor &PP,
const LangOptions &LangOpts)		const LangOptions &LangOpts, const PreambleBounds &PB)
: Includes(Includes), Delegate(Delegate), SM(SM), PP(PP),		: Includes(Includes), Delegate(Delegate), SM(SM), PP(PP) {
		sammccallUnsubmitted Done Reply Inline Actions tokenizing the whole file an extra time on every AST build seems a bit sad - this is considerably more lexing than we were doing before. Probably doesn't matter? We could trim this to the preamble bounds I guess. Or even compute it once when the preamble is built, since we assume all the bytes are the same? I guess SourceLocations are the problem... we could just translate offsets into the new SM, but that gets messy. On the other hand, assuming the preamble isn't going to change at all seems like an assumption not long for this world. On the first hand again, maybe we'll have to revisit looots of stuff (go to definition and everything) once that assumption breaks anyway. sammccall: tokenizing the whole file an extra time on every AST build seems a bit sad - this is…
		kadircetAuthorUnsubmitted Done Reply Inline Actions Implemented a way to partially tokenize a file in D74962. On the other hand, assuming the preamble isn't going to change at all seems like an assumption not long for this world. It should be okay for replaypreambles as only clang tidy checkers depends on this logic and we are not planning to emit diagnostics with stale preambles. kadircet: Implemented a way to partially tokenize a file in D74962. > On the other hand, assuming the…
LangOpts(LangOpts) {}		// Only tokenize the preamble section of the main file, as we are not
		// interested in the rest of the tokens.
		MainFileTokens = syntax::tokenize(
		syntax::FileRange(SM.getMainFileID(), 0, PB.Size), SM, LangOpts);
		}

// In a normal compile, the preamble traverses the following structure:		// In a normal compile, the preamble traverses the following structure:
//		//
// mainfile.cpp		// mainfile.cpp
// <built-in>		// <built-in>
// ... macro definitions like __cplusplus ...		// ... macro definitions like __cplusplus ...
// <command-line>		// <command-line>
// ... macro definitions for args like -Dfoo=bar ...		// ... macro definitions for args like -Dfoo=bar ...
Show All 15 Lines	private:

void replay() {		void replay() {
for (const auto &Inc : Includes.MainFileIncludes) {		for (const auto &Inc : Includes.MainFileIncludes) {
const FileEntry *File = nullptr;		const FileEntry *File = nullptr;
if (Inc.Resolved != "")		if (Inc.Resolved != "")
if (auto FE = SM.getFileManager().getFile(Inc.Resolved))		if (auto FE = SM.getFileManager().getFile(Inc.Resolved))
File = *FE;		File = *FE;

		// Re-lex the #include directive to find its interesting parts.
		auto HashLoc = SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset);
		auto HashTok = llvm::partition_point(MainFileTokens,
		sammccallUnsubmitted Done Reply Inline Actions why raw encoding? sammccall: why raw encoding?
		[&HashLoc](const syntax::Token &T) {
		return T.location() < HashLoc;
		sammccallUnsubmitted Done Reply Inline Actions this looks like a linear search for each #include sammccall: this looks like a linear search for each #include
		kadircetAuthorUnsubmitted Done Reply Inline Actions made it logarithmic instead, we can also make it linear in total if we decide to rely on the fact that `MainFileIncludes` are sorted. I believe it is currently true but never promised by the include collector. kadircet: made it logarithmic instead, we can also make it linear in total if we decide to rely on the…
		});
		assert(HashTok != MainFileTokens.end() && HashTok->kind() == tok::hash);

		auto IncludeTok = std::next(HashTok);
		sammccallUnsubmitted Done Reply Inline Actions nit: IncludeTok sammccall: nit: IncludeTok
		assert(IncludeTok != MainFileTokens.end());

		auto FileTok = std::next(IncludeTok);
		assert(FileTok != MainFileTokens.end());

		// Create a fake import/include token, none of the callers seem to care
		// about clang::Token::Flags.
		sammccallUnsubmitted Done Reply Inline Actions Not clear what "imitate the PP logic" means. We construct a fake 'import'/'include' token... nobody cares about clang::Token::Flags. sammccall: Not clear what "imitate the PP logic" means. We construct a fake 'import'/'include' token...
		kadircetAuthorUnsubmitted Done Reply Inline Actions it was refering to the fact that we were performing the `PP.LookupIdentifierInfo` call to set kind etc. kadircet: it was refering to the fact that we were performing the `PP.LookupIdentifierInfo` call to set…
		Token SynthesizedIncludeTok;
		SynthesizedIncludeTok.startToken();
		SynthesizedIncludeTok.setLocation(IncludeTok->location());
		SynthesizedIncludeTok.setLength(IncludeTok->length());
		SynthesizedIncludeTok.setKind(tok::raw_identifier);
		SynthesizedIncludeTok.setRawIdentifierData(IncludeTok->text(SM).data());
		PP.LookUpIdentifierInfo(SynthesizedIncludeTok);

		// Same here, create a fake one for Filename, including angles or quotes.
		Token SynthesizedFilenameTok;
		SynthesizedFilenameTok.startToken();
		SynthesizedFilenameTok.setLocation(FileTok->location());
		// Note that we can't make use of FileTok->length/text in here as in the
		// case of angled includes this will contain tok::less instead of
		// filename. Whereas Inc.Written contains the full header name including
		// quotes/angles.
		SynthesizedFilenameTok.setLength(Inc.Written.length());
		SynthesizedFilenameTok.setKind(tok::header_name);
		SynthesizedFilenameTok.setLiteralData(Inc.Written.data());

llvm::StringRef WrittenFilename =		llvm::StringRef WrittenFilename =
llvm::StringRef(Inc.Written).drop_front().drop_back();		llvm::StringRef(Inc.Written).drop_front().drop_back();
bool Angled = llvm::StringRef(Inc.Written).startswith("<");		Delegate->InclusionDirective(HashTok->location(), SynthesizedIncludeTok,
		WrittenFilename, Inc.Written.front() == '<',
// Re-lex the #include directive to find its interesting parts.		FileTok->range(SM).toCharRange(SM), File,
llvm::StringRef Src = SM.getBufferData(SM.getMainFileID());		"SearchPath", "RelPath",
Lexer RawLexer(SM.getLocForStartOfFile(SM.getMainFileID()), LangOpts,		/Imported=/nullptr, Inc.FileKind);
Src.begin(), Src.begin() + Inc.HashOffset, Src.end());
Token HashTok, IncludeTok, FilenameTok;
RawLexer.LexFromRawLexer(HashTok);
assert(HashTok.getKind() == tok::hash);
RawLexer.setParsingPreprocessorDirective(true);
RawLexer.LexFromRawLexer(IncludeTok);
IdentifierInfo *II = PP.getIdentifierInfo(IncludeTok.getRawIdentifier());
IncludeTok.setIdentifierInfo(II);
IncludeTok.setKind(II->getTokenID());
RawLexer.LexIncludeFilename(FilenameTok);

Delegate->InclusionDirective(
HashTok.getLocation(), IncludeTok, WrittenFilename, Angled,
CharSourceRange::getCharRange(FilenameTok.getLocation(),
FilenameTok.getEndLoc()),
File, "SearchPath", "RelPath", /Imported=/nullptr, Inc.FileKind);
if (File)		if (File)
// FIXME: Use correctly named FileEntryRef.		// FIXME: Use correctly named FileEntryRef.
Delegate->FileSkipped(FileEntryRef(File->getName(), *File), FilenameTok,		Delegate->FileSkipped(FileEntryRef(File->getName(), *File),
Inc.FileKind);		SynthesizedFilenameTok, Inc.FileKind);
else {		else {
llvm::SmallString<1> UnusedRecovery;		llvm::SmallString<1> UnusedRecovery;
Delegate->FileNotFound(WrittenFilename, UnusedRecovery);		Delegate->FileNotFound(WrittenFilename, UnusedRecovery);
}		}
}		}
}		}

const IncludeStructure &Includes;		const IncludeStructure &Includes;
PPCallbacks *Delegate;		PPCallbacks *Delegate;
const SourceManager &SM;		const SourceManager &SM;
Preprocessor &PP;		Preprocessor &PP;
const LangOptions &LangOpts;		std::vector<syntax::Token> MainFileTokens;
};		};

} // namespace		} // namespace

void dumpAST(ParsedAST &AST, llvm::raw_ostream &OS) {		void dumpAST(ParsedAST &AST, llvm::raw_ostream &OS) {
AST.getASTContext().getTranslationUnitDecl()->dump(OS, true);		AST.getASTContext().getTranslationUnitDecl()->dump(OS, true);
}		}

▲ Show 20 Lines • Show All 115 Lines • ▼ Show 20 Lines	if (Opts.SuggestMissingIncludes && Index && !BuildDir.getError()) {
Clang->setExternalSemaSource(FixIncludes->unresolvedNameRecorder());		Clang->setExternalSemaSource(FixIncludes->unresolvedNameRecorder());
}		}

// Copy over the includes from the preamble, then combine with the		// Copy over the includes from the preamble, then combine with the
// non-preamble includes below.		// non-preamble includes below.
auto Includes = Preamble ? Preamble->Includes : IncludeStructure{};		auto Includes = Preamble ? Preamble->Includes : IncludeStructure{};
// Replay the preamble includes so that clang-tidy checks can see them.		// Replay the preamble includes so that clang-tidy checks can see them.
if (Preamble)		if (Preamble)
ReplayPreamble::attach(Includes, *Clang);		ReplayPreamble::attach(Includes, *Clang, Preamble->Preamble.getBounds());
// Important: collectIncludeStructure is registered after ReplayPreamble!		// Important: collectIncludeStructure is registered after ReplayPreamble!
// Otherwise we would collect the replayed includes again...		// Otherwise we would collect the replayed includes again...
// (We can't just use the replayed includes, they don't have Resolved path).		// (We can't just use the replayed includes, they don't have Resolved path).
Clang->getPreprocessor().addPPCallbacks(		Clang->getPreprocessor().addPPCallbacks(
collectIncludeStructureCallback(Clang->getSourceManager(), &Includes));		collectIncludeStructureCallback(Clang->getSourceManager(), &Includes));
// Copy over the macros in the preamble region of the main file, and combine		// Copy over the macros in the preamble region of the main file, and combine
// with non-preamble macros below.		// with non-preamble macros below.
MainFileMacros Macros;		MainFileMacros Macros;
▲ Show 20 Lines • Show All 184 Lines • Show Last 20 Lines

clang-tools-extra/clangd/unittests/ParsedASTTests.cpp

//===-- ParsedASTTests.cpp ------------------------------------------------===//		//===-- ParsedASTTests.cpp ------------------------------------------------===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
// These tests cover clangd's logic to build a TU, which generally uses the APIs		// These tests cover clangd's logic to build a TU, which generally uses the APIs
// in ParsedAST and Preamble, via the TestTU helper.		// in ParsedAST and Preamble, via the TestTU helper.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

		#include "../../clang-tidy/ClangTidyModule.h"
		#include "../../clang-tidy/ClangTidyModuleRegistry.h"
#include "AST.h"		#include "AST.h"
#include "Annotations.h"		#include "Annotations.h"
#include "Compiler.h"		#include "Compiler.h"
#include "Diagnostics.h"		#include "Diagnostics.h"
#include "ParsedAST.h"		#include "ParsedAST.h"
#include "SourceCode.h"		#include "SourceCode.h"
#include "TestFS.h"		#include "TestFS.h"
#include "TestTU.h"		#include "TestTU.h"
#include "clang/AST/DeclTemplate.h"		#include "clang/AST/DeclTemplate.h"
		#include "clang/Basic/SourceLocation.h"
		#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"		#include "clang/Basic/TokenKinds.h"
		#include "clang/Lex/PPCallbacks.h"
		#include "clang/Lex/Token.h"
#include "clang/Tooling/Syntax/Tokens.h"		#include "clang/Tooling/Syntax/Tokens.h"
		#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ScopedPrinter.h"		#include "llvm/Support/ScopedPrinter.h"
#include "gmock/gmock-matchers.h"		#include "gmock/gmock-matchers.h"
#include "gmock/gmock.h"		#include "gmock/gmock.h"
#include "gtest/gtest.h"		#include "gtest/gtest.h"

namespace clang {		namespace clang {
namespace clangd {		namespace clangd {
namespace {		namespace {
Show All 33 Lines	if (const auto *Args = FD->getTemplateSpecializationArgs()) {
return ArgName == "<" + SpecializationArgs + ">";		return ArgName == "<" + SpecializationArgs + ">";
}		}
}		}
if (const NamedDecl *ND = dyn_cast<NamedDecl>(arg))		if (const NamedDecl *ND = dyn_cast<NamedDecl>(arg))
return printTemplateSpecializationArgs(*ND) == ArgName;		return printTemplateSpecializationArgs(*ND) == ArgName;
return false;		return false;
}		}

		MATCHER_P(RangeIs, R, "") {
		return arg.beginOffset() == R.Begin && arg.endOffset() == R.End;
		}

TEST(ParsedASTTest, TopLevelDecls) {		TEST(ParsedASTTest, TopLevelDecls) {
TestTU TU;		TestTU TU;
TU.HeaderCode = R"(		TU.HeaderCode = R"(
int header1();		int header1();
int header2;		int header2;
)";		)";
TU.Code = "int main();";		TU.Code = "int main();";
auto AST = TU.build();		auto AST = TU.build();
▲ Show 20 Lines • Show All 209 Lines • ▼ Show 20 Lines	for (const auto &R : SIDToRefs.second)
MacroExpansionPositions.push_back(R.start);		MacroExpansionPositions.push_back(R.start);
}		}
for (const auto &R : AST.getMacros().UnknownMacros)		for (const auto &R : AST.getMacros().UnknownMacros)
MacroExpansionPositions.push_back(R.start);		MacroExpansionPositions.push_back(R.start);
EXPECT_THAT(MacroExpansionPositions,		EXPECT_THAT(MacroExpansionPositions,
testing::UnorderedElementsAreArray(TestCase.points()));		testing::UnorderedElementsAreArray(TestCase.points()));
}		}

		TEST(ParsedASTTest, ReplayPreambleForTidyCheckers) {
		struct Inclusion {
		Inclusion(const SourceManager &SM, SourceLocation HashLoc,
		const Token &IncludeTok, llvm::StringRef FileName, bool IsAngled,
		CharSourceRange FilenameRange)
		: HashOffset(SM.getDecomposedLoc(HashLoc).second), IncTok(IncludeTok),
		IncDirective(IncludeTok.getIdentifierInfo()->getName()),
		FileNameOffset(SM.getDecomposedLoc(FilenameRange.getBegin()).second),
		FileName(FileName), IsAngled(IsAngled) {}
		size_t HashOffset;
		syntax::Token IncTok;
		llvm::StringRef IncDirective;
		size_t FileNameOffset;
		llvm::StringRef FileName;
		bool IsAngled;
		};
		static std::vector<Inclusion> Includes;
		static std::vector<syntax::Token> SkippedFiles;
		struct ReplayPreamblePPCallback : public PPCallbacks {
		const SourceManager &SM;
		explicit ReplayPreamblePPCallback(const SourceManager &SM) : SM(SM) {}

		void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
		StringRef FileName, bool IsAngled,
		CharSourceRange FilenameRange, const FileEntry *,
		StringRef, StringRef, const Module *,
		SrcMgr::CharacteristicKind) override {
		Includes.emplace_back(SM, HashLoc, IncludeTok, FileName, IsAngled,
		FilenameRange);
		}

		void FileSkipped(const FileEntryRef &, const Token &FilenameTok,
		SrcMgr::CharacteristicKind) override {
		SkippedFiles.emplace_back(FilenameTok);
		}
		};
		struct ReplayPreambleCheck : public tidy::ClangTidyCheck {
		ReplayPreambleCheck(StringRef Name, tidy::ClangTidyContext *Context)
		: ClangTidyCheck(Name, Context) {}
		void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP,
		Preprocessor *ModuleExpanderPP) override {
		PP->addPPCallbacks(::std::make_unique<ReplayPreamblePPCallback>(SM));
		}
		};
		struct ReplayPreambleModule : public tidy::ClangTidyModule {
		void
		addCheckFactories(tidy::ClangTidyCheckFactories &CheckFactories) override {
		CheckFactories.registerCheck<ReplayPreambleCheck>(
		"replay-preamble-check");
		}
		};

		static tidy::ClangTidyModuleRegistry::Add<ReplayPreambleModule> X(
		"replay-preamble-module", "");
		TestTU TU;
		// This check records inclusion directives replayed by clangd.
		TU.ClangTidyChecks = "replay-preamble-check";
		llvm::Annotations Test(R"cpp(
		$hash^#$include[[import]] $filebegin^"$filerange[[bar.h]]"
		$hash^#$include[[include_next]] $filebegin^"$filerange[[baz.h]]"
		$hash^#$include[[include]] $filebegin^<$filerange[[a.h]]>)cpp");
		llvm::StringRef Code = Test.code();
		TU.Code = Code.str();
		TU.AdditionalFiles["bar.h"] = "";
		TU.AdditionalFiles["baz.h"] = "";
		TU.AdditionalFiles["a.h"] = "";
		TU.ExtraArgs = {"-isystem."};

		const auto &AST = TU.build();
		const auto &SM = AST.getSourceManager();

		auto HashLocs = Test.points("hash");
		ASSERT_EQ(HashLocs.size(), Includes.size());
		auto IncludeRanges = Test.ranges("include");
		ASSERT_EQ(IncludeRanges.size(), Includes.size());
		auto FileBeginLocs = Test.points("filebegin");
		ASSERT_EQ(FileBeginLocs.size(), Includes.size());
		auto FileRanges = Test.ranges("filerange");
		ASSERT_EQ(FileRanges.size(), Includes.size());
		sammccallUnsubmitted Done Reply Inline Actions I think it would be clearer to have parallel named point/range lists rather than doing index math. So the annotated code would be pretty verbose like: `$hash^#$include[[import]] $filerange^"$file[[bar.h]]"...` But I think the setup/asserts would be clearer. sammccall: I think it would be clearer to have parallel named point/range lists rather than doing index…

		ASSERT_EQ(SkippedFiles.size(), Includes.size());
		for (size_t I = 0; I < Includes.size(); ++I) {
		const auto &Inc = Includes[I];

		EXPECT_EQ(Inc.HashOffset, HashLocs[I]);

		auto IncRange = IncludeRanges[I];
		EXPECT_THAT(Inc.IncTok.range(SM), RangeIs(IncRange));
		EXPECT_EQ(Inc.IncTok.kind(), tok::identifier);
		EXPECT_EQ(Inc.IncDirective,
		Code.substr(IncRange.Begin, IncRange.End - IncRange.Begin));

		EXPECT_EQ(Inc.FileNameOffset, FileBeginLocs[I]);
		EXPECT_EQ(Inc.IsAngled, Code[FileBeginLocs[I]] == '<');

		auto FileRange = FileRanges[I];
		EXPECT_EQ(Inc.FileName,
		Code.substr(FileRange.Begin, FileRange.End - FileRange.Begin));

		EXPECT_EQ(SM.getDecomposedLoc(SkippedFiles[I].location()).second,
		Inc.FileNameOffset);
		// This also contains quotes/angles so increment the range by one from both
		// sides.
		EXPECT_EQ(
		SkippedFiles[I].text(SM),
		Code.substr(FileRange.Begin - 1, FileRange.End - FileRange.Begin + 2));
		EXPECT_EQ(SkippedFiles[I].kind(), tok::header_name);
		}
		}

} // namespace		} // namespace
} // namespace clangd		} // namespace clangd
} // namespace clang		} // namespace clang

This is an archive of the discontinued LLVM Phabricator instance.

[clangd] Make use of syntax tokens in ReplayPreamble
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 248128

clang-tools-extra/clangd/ParsedAST.cpp

clang-tools-extra/clangd/unittests/ParsedASTTests.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[clangd] Make use of syntax tokens in ReplayPreambleClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 248128

clang-tools-extra/clangd/ParsedAST.cpp

clang-tools-extra/clangd/unittests/ParsedASTTests.cpp

[clangd] Make use of syntax tokens in ReplayPreamble
ClosedPublic