diff --git a/clang/include/clang/ExtractAPI/FrontendActions.h b/clang/include/clang/ExtractAPI/FrontendActions.h --- a/clang/include/clang/ExtractAPI/FrontendActions.h +++ b/clang/include/clang/ExtractAPI/FrontendActions.h @@ -39,6 +39,9 @@ /// files. std::unique_ptr Buffer; + /// The input file originally provided on the command line. + std::vector KnownInputFiles; + /// Prepare to execute the action on the given CompilerInstance. /// /// This is called before executing the action on any inputs. This generates a diff --git a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp --- a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp +++ b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp @@ -19,6 +19,7 @@ #include "clang/AST/ParentMapContext.h" #include "clang/AST/RawCommentList.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/SourceLocation.h" #include "clang/Basic/TargetInfo.h" #include "clang/ExtractAPI/API.h" #include "clang/ExtractAPI/AvailabilityInfo.h" @@ -30,23 +31,88 @@ #include "clang/Frontend/FrontendOptions.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include +#include using namespace clang; using namespace extractapi; namespace { +struct LocationFileChecker { + bool isLocationInKnownFile(SourceLocation Loc) { + // If the loc refers to a macro expansion we need to first get the file + // location of the expansion. + auto FileLoc = SM.getFileLoc(Loc); + FileID FID = SM.getFileID(FileLoc); + if (FID.isInvalid()) + return false; + + const auto *File = SM.getFileEntryForID(FID); + if (!File) + return false; + + if (KnownFileEntries.count(File)) + return true; + + // Check if we have looked up this particular file entry previously instead + // of querying the preprocessor for externsal sources. + if (UnknownFileEntries.count(File)) + return false; + + // If file was not found, search by how the header was included. This is + // primarily to resolve headers found via headermaps, as they remap + // locations. + const auto *FileInfo = PP.getHeaderSearchInfo().getExistingFileInfo(File); + if (!FileInfo || !FileInfo->IsValid) + return false; + + StringRef FileName = File->getName(); + + if (!KnownIncludes.count(FileName)) { + // Record that the file was found to avoid future string searches for the + // same file. + UnknownFileEntries.insert(File); + return false; + } + + KnownFileEntries.insert(File); + return true; + } + + LocationFileChecker(const SourceManager &SM, const Preprocessor &PP, + const std::vector &KnownFiles) + : SM(SM), PP(PP) { + for (const auto &KnownFilePath : KnownFiles) { + if (auto FileEntry = SM.getFileManager().getFile(KnownFilePath)) + KnownFileEntries.insert(*FileEntry); + KnownIncludes.insert(KnownFilePath); + } + } + +private: + const SourceManager &SM; + const Preprocessor &PP; + llvm::DenseSet KnownFileEntries; + llvm::StringSet<> KnownIncludes; + // Memoize unknown file entries so we don't have to check for external sources + // all the time. + llvm::DenseSet UnknownFileEntries; +}; + /// The RecursiveASTVisitor to traverse symbol declarations and collect API /// information. class ExtractAPIVisitor : public RecursiveASTVisitor { public: - ExtractAPIVisitor(ASTContext &Context, APISet &API) - : Context(Context), API(API) {} + ExtractAPIVisitor(ASTContext &Context, LocationFileChecker &LCF, APISet &API) + : Context(Context), API(API), LCF(LCF) {} const APISet &getAPI() const { return API; } @@ -68,6 +134,9 @@ Decl->getTemplateSpecializationKind() == TSK_Undeclared) return true; + if (!LCF.isLocationInKnownFile(Decl->getLocation())) + return true; + // Collect symbol information. StringRef Name = Decl->getName(); StringRef USR = API.recordUSR(Decl); @@ -125,6 +194,9 @@ return true; } + if (!LCF.isLocationInKnownFile(Decl->getLocation())) + return true; + // Collect symbol information. StringRef Name = Decl->getName(); StringRef USR = API.recordUSR(Decl); @@ -159,6 +231,9 @@ if (!Decl->isThisDeclarationADefinition()) return true; + if (!LCF.isLocationInKnownFile(Decl->getLocation())) + return true; + // Collect symbol information. StringRef Name = Decl->getName(); StringRef USR = API.recordUSR(Decl); @@ -194,6 +269,9 @@ if (isa(Decl)) return true; + if (!LCF.isLocationInKnownFile(Decl->getLocation())) + return true; + // Collect symbol information. StringRef Name = Decl->getName(); StringRef USR = API.recordUSR(Decl); @@ -225,6 +303,9 @@ if (!Decl->isThisDeclarationADefinition()) return true; + if (!LCF.isLocationInKnownFile(Decl->getLocation())) + return true; + // Collect symbol information. StringRef Name = Decl->getName(); StringRef USR = API.recordUSR(Decl); @@ -269,6 +350,9 @@ if (!Decl->isThisDeclarationADefinition()) return true; + if (!LCF.isLocationInKnownFile(Decl->getLocation())) + return true; + // Collect symbol information. StringRef Name = Decl->getName(); StringRef USR = API.recordUSR(Decl); @@ -494,12 +578,14 @@ ASTContext &Context; APISet &API; + LocationFileChecker &LCF; }; class ExtractAPIConsumer : public ASTConsumer { public: - ExtractAPIConsumer(ASTContext &Context, APISet &API) - : Visitor(Context, API) {} + ExtractAPIConsumer(ASTContext &Context, + std::unique_ptr LCF, APISet &API) + : Visitor(Context, *LCF, API), LCF(std::move(LCF)) {} void HandleTranslationUnit(ASTContext &Context) override { // Use ExtractAPIVisitor to traverse symbol declarations in the context. @@ -508,11 +594,13 @@ private: ExtractAPIVisitor Visitor; + std::unique_ptr LCF; }; class MacroCallback : public PPCallbacks { public: - MacroCallback(const SourceManager &SM, APISet &API) : SM(SM), API(API) {} + MacroCallback(const SourceManager &SM, LocationFileChecker &LCF, APISet &API) + : SM(SM), LCF(LCF), API(API) {} void MacroDefined(const Token &MacroNameToken, const MacroDirective *MD) override { @@ -552,6 +640,9 @@ if (PM.MD->getMacroInfo()->isUsedForHeaderGuard()) continue; + if (!LCF.isLocationInKnownFile(PM.MacroNameToken.getLocation())) + continue; + StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName(); PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation()); StringRef USR = @@ -576,6 +667,7 @@ }; const SourceManager &SM; + LocationFileChecker &LCF; APISet &API; llvm::SmallVector PendingMacros; }; @@ -596,11 +688,15 @@ CI.getTarget().getTriple(), CI.getFrontendOpts().Inputs.back().getKind().getLanguage()); + auto LCF = std::make_unique( + CI.getSourceManager(), CI.getPreprocessor(), KnownInputFiles); + // Register preprocessor callbacks that will add macro definitions to API. CI.getPreprocessor().addPPCallbacks( - std::make_unique(CI.getSourceManager(), *API)); + std::make_unique(CI.getSourceManager(), *LCF, *API)); - return std::make_unique(CI.getASTContext(), *API); + return std::make_unique(CI.getASTContext(), + std::move(LCF), *API); } bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) { @@ -620,6 +716,8 @@ HeaderContents += " \""; HeaderContents += FIF.getFile(); HeaderContents += "\"\n"; + + KnownInputFiles.emplace_back(FIF.getFile()); } Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents, diff --git a/clang/test/ExtractAPI/known_files_only.c b/clang/test/ExtractAPI/known_files_only.c new file mode 100644 --- /dev/null +++ b/clang/test/ExtractAPI/known_files_only.c @@ -0,0 +1,499 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s@INPUT_DIR@%/t@g" %t/reference.output.json.in >> \ +// RUN: %t/reference.output.json +// RUN: %clang -extract-api --product-name=GlobalRecord -target arm64-apple-macosx \ +// RUN: %t/input1.h %t/input2.h %t/input3.h -o %t/output.json | FileCheck -allow-empty %s + +// Generator version is not consistent across test runs, normalize it. +// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ +// RUN: %t/output.json >> %t/output-normalized.json +// RUN: diff %t/reference.output.json %t/output-normalized.json + +// CHECK-NOT: error: +// CHECK-NOT: warning: + +//--- input1.h +int num; + +// Let's make sure we aren't pulling in symbols from complex.h +#include +double complex build_complex(double real, double imaginary); + +//--- input2.h +/** + * \brief Add two numbers. + * \param [in] x A number. + * \param [in] y Another number. + * \param [out] res The result of x + y. + */ +void add(const int x, const int y, int *res); + +//--- input3.h +char unavailable __attribute__((unavailable)); + +//--- reference.output.json.in +{ + "metadata": { + "formatVersion": { + "major": 0, + "minor": 5, + "patch": 3 + }, + "generator": "?" + }, + "module": { + "name": "GlobalRecord", + "platform": { + "architecture": "arm64", + "operatingSystem": { + "minimumVersion": { + "major": 11, + "minor": 0, + "patch": 0 + }, + "name": "macosx" + }, + "vendor": "apple" + } + }, + "relationhips": [], + "symbols": [ + { + "declarationFragments": [ + { + "kind": "typeIdentifier", + "preciseIdentifier": "c:I", + "spelling": "int" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "identifier", + "spelling": "num" + } + ], + "identifier": { + "interfaceLanguage": "c", + "precise": "c:@num" + }, + "kind": { + "displayName": "Global Variable", + "identifier": "c.var" + }, + "location": { + "character": 5, + "line": 1, + "uri": "file://INPUT_DIR/input1.h" + }, + "names": { + "subHeading": [ + { + "kind": "identifier", + "spelling": "num" + } + ], + "title": "num" + } + }, + { + "declarationFragments": [ + { + "kind": "typeIdentifier", + "preciseIdentifier": "c: