diff --git a/clang/examples/CMakeLists.txt b/clang/examples/CMakeLists.txt --- a/clang/examples/CMakeLists.txt +++ b/clang/examples/CMakeLists.txt @@ -5,6 +5,7 @@ add_subdirectory(clang-interpreter) add_subdirectory(PrintFunctionNames) +add_subdirectory(PrintAttributeTokens) add_subdirectory(AnnotateFunctions) add_subdirectory(Attribute) add_subdirectory(CallSuperAttribute) diff --git a/clang/examples/PrintAttributeTokens/CMakeLists.txt b/clang/examples/PrintAttributeTokens/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang/examples/PrintAttributeTokens/CMakeLists.txt @@ -0,0 +1,23 @@ +# If we don't need RTTI or EH, there's no reason to export anything +# from the plugin. +if( NOT MSVC ) # MSVC mangles symbols differently, and + # PrintAttributeTokens.export contains C++ symbols. + if( NOT LLVM_REQUIRES_RTTI ) + if( NOT LLVM_REQUIRES_EH ) + set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/PrintAttributeTokens.exports) + endif() + endif() +endif() + +add_llvm_library(PrintAttributeTokens MODULE PrintAttributeTokens.cpp PLUGIN_TOOL clang) + +if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN)) + set(LLVM_LINK_COMPONENTS + Support + ) + clang_target_link_libraries(PrintAttributeTokens PRIVATE + clangAST + clangBasic + clangFrontend + ) +endif() diff --git a/clang/examples/PrintAttributeTokens/PrintAttributeTokens.cpp b/clang/examples/PrintAttributeTokens/PrintAttributeTokens.cpp new file mode 100644 --- /dev/null +++ b/clang/examples/PrintAttributeTokens/PrintAttributeTokens.cpp @@ -0,0 +1,67 @@ +//===- PrintAttributeTokens.cpp +//---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Example clang plugin which prints all argument tokens from within a +// [[print_tokens(...)]] attribute. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Token.h" +#include "clang/Sema/ParsedAttr.h" +#include "clang/Sema/Sema.h" +#include "clang/Sema/SemaDiagnostic.h" +#include "llvm/Support/raw_ostream.h" +using namespace clang; + +namespace { + +struct PrintTokensAttrInfo : public ParsedAttrInfo { + PrintTokensAttrInfo() { + // C++-style [[example]] and [[plugin::example]] supported. + static constexpr Spelling S[] = { + {ParsedAttr::AS_CXX11, "print_tokens"}, + {ParsedAttr::AS_CXX11, "plugin::print_tokens"}}; + Spellings = S; + } + + AttrHandling handleDeclAttribute(Sema &S, Decl *D, + const ParsedAttr &Attr) const override { + llvm::outs() << "PrintAttributeTokens -----------------------\n"; + D->dump(llvm::outs()); + llvm::outs() << "\n"; + + if (Attr.getNumTokens() == 0) { + llvm::outs() << "\n"; + } else { + const Token *tokens = Attr.getTokens(); + for (unsigned I = 0; I < Attr.getNumTokens(); I++) { + llvm::outs() << tokens[I].getName(); + + if (tokens[I].isLiteral()) { + llvm::outs() << "\t" << tokens[I].getLiteralData(); + } + + if (tokens[I].isAnyIdentifier()) { + llvm::outs() << "\t" << tokens[I].getIdentifierInfo()->getName(); + } + + llvm::outs() << "\n"; + } + } + + llvm::outs() << "\n--------------------------------------------\n"; + + return AttributeApplied; + } +}; + +} // namespace + +static ParsedAttrInfoRegistry::Add + X("print_tokens", "print tokens inside the attribute arguments"); diff --git a/clang/examples/PrintAttributeTokens/PrintAttributeTokens.exports b/clang/examples/PrintAttributeTokens/PrintAttributeTokens.exports new file mode 100644 diff --git a/clang/examples/PrintAttributeTokens/README.txt b/clang/examples/PrintAttributeTokens/README.txt new file mode 100644 --- /dev/null +++ b/clang/examples/PrintAttributeTokens/README.txt @@ -0,0 +1,12 @@ +This is a simple example demonstrating how to use clang's facility for +registering custom attribute processors using a plugin. + +Build the plugin by running `make` in this directory. + +Once the plugin is built, you can run it using: +-- +Linux: +$ clang -cc1 -load ../../Debug+Asserts/lib/libPrintAttributeTokens.so -plugin print-attr-tokens some-input-file.c + +Mac: +$ clang -cc1 -load ../../Debug+Asserts/lib/libPrintAttributeTokens.dylib -plugin print-attr-tokens some-input-file.c diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -131,6 +131,7 @@ friend class VariadicMacroScopeGuard; llvm::unique_function OnToken; + llvm::unique_function OnRecordedToken; std::shared_ptr PPOpts; DiagnosticsEngine *Diags; LangOptions &LangOpts; @@ -1041,10 +1042,34 @@ /// Register a function that would be called on each token in the final /// expanded token stream. /// This also reports annotation tokens produced by the parser. + /// + /// Note that the callback is not invoked for tokens re-emitted from the + /// backtrack cache. To receive callbacks for *any* tokens returned from + /// Lex(), use setTokenRecorder() instead. void setTokenWatcher(llvm::unique_function F) { OnToken = std::move(F); } + /// True if an active token watcher function has been registered with + /// setTokenWatcher(). + bool hasTokenWatcher() const { return bool(OnToken); } + + /// Register a function that would be called on each token returned by Lex(). + /// This also reports annotation tokens produced by the parser. + /// + /// Note that the callback is invoked for tokens re-emitted from the backtrack + /// cache. Depending on how the Lexer is used between calls to + /// setTokenRecorder(), the callback may be called multiple times for the + /// same token. To avoid this and only receive fresh tokens from the + /// underlying lexer, use setTokenWatcher() instead. + void setTokenRecorder(llvm::unique_function F) { + OnRecordedToken = std::move(F); + } + + /// True if an active token recorder function has been registered with + /// setTokenRecorder(). + bool hasTokenRecorder() const { return bool(OnRecordedToken); } + void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; } bool isMacroDefined(StringRef Id) { diff --git a/clang/include/clang/Sema/ParsedAttr.h b/clang/include/clang/Sema/ParsedAttr.h --- a/clang/include/clang/Sema/ParsedAttr.h +++ b/clang/include/clang/Sema/ParsedAttr.h @@ -18,6 +18,7 @@ #include "clang/Basic/AttributeCommonInfo.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Token.h" #include "clang/Sema/Ownership.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallVector.h" @@ -201,12 +202,14 @@ /// class ParsedAttr final : public AttributeCommonInfo, - private llvm::TrailingObjects< - ParsedAttr, ArgsUnion, detail::AvailabilityData, - detail::TypeTagForDatatypeData, ParsedType, detail::PropertyData> { + private llvm::TrailingObjects { friend TrailingObjects; size_t numTrailingObjects(OverloadToken) const { return NumArgs; } + size_t numTrailingObjects(OverloadToken) const { return NumTokens; } size_t numTrailingObjects(OverloadToken) const { return IsAvailability; } @@ -264,6 +267,10 @@ /// availability attribute. SourceLocation UnavailableLoc; + /// The number of tokens within the argument. + /// The tokens themselves are stored after the object. + unsigned NumTokens; + const Expr *MessageExpr; const ParsedAttrInfo &Info; @@ -273,6 +280,9 @@ return getTrailingObjects(); } + Token *getTokensBuffer() { return getTrailingObjects(); } + const Token *getTokensBuffer() const { return getTrailingObjects(); } + detail::AvailabilityData *getAvailabilityData() { return getTrailingObjects(); } @@ -288,16 +298,19 @@ ParsedAttr(IdentifierInfo *attrName, SourceRange attrRange, IdentifierInfo *scopeName, SourceLocation scopeLoc, ArgsUnion *args, unsigned numArgs, Syntax syntaxUsed, - SourceLocation ellipsisLoc) + Token *tokens = nullptr, unsigned numTokens = 0, + SourceLocation ellipsisLoc = SourceLocation()) : AttributeCommonInfo(attrName, scopeName, attrRange, scopeLoc, syntaxUsed), EllipsisLoc(ellipsisLoc), NumArgs(numArgs), Invalid(false), UsedAsTypeAttr(false), IsAvailability(false), IsTypeTagForDatatype(false), IsProperty(false), HasParsedType(false), HasProcessingCache(false), IsPragmaClangAttribute(false), - Info(ParsedAttrInfo::get(*this)) { + NumTokens(numTokens), Info(ParsedAttrInfo::get(*this)) { if (numArgs) memcpy(getArgsBuffer(), args, numArgs * sizeof(ArgsUnion)); + if (numTokens) + memcpy(getTokensBuffer(), tokens, numTokens * sizeof(Token)); } /// Constructor for availability attributes. @@ -313,7 +326,7 @@ NumArgs(1), Invalid(false), UsedAsTypeAttr(false), IsAvailability(true), IsTypeTagForDatatype(false), IsProperty(false), HasParsedType(false), HasProcessingCache(false), IsPragmaClangAttribute(false), - UnavailableLoc(unavailable), MessageExpr(messageExpr), + UnavailableLoc(unavailable), NumTokens(0), MessageExpr(messageExpr), Info(ParsedAttrInfo::get(*this)) { ArgsUnion PVal(Parm); memcpy(getArgsBuffer(), &PVal, sizeof(ArgsUnion)); @@ -331,7 +344,8 @@ NumArgs(3), Invalid(false), UsedAsTypeAttr(false), IsAvailability(false), IsTypeTagForDatatype(false), IsProperty(false), HasParsedType(false), HasProcessingCache(false), - IsPragmaClangAttribute(false), Info(ParsedAttrInfo::get(*this)) { + IsPragmaClangAttribute(false), NumTokens(0), + Info(ParsedAttrInfo::get(*this)) { ArgsUnion *Args = getArgsBuffer(); Args[0] = Parm1; Args[1] = Parm2; @@ -348,7 +362,8 @@ NumArgs(1), Invalid(false), UsedAsTypeAttr(false), IsAvailability(false), IsTypeTagForDatatype(true), IsProperty(false), HasParsedType(false), HasProcessingCache(false), - IsPragmaClangAttribute(false), Info(ParsedAttrInfo::get(*this)) { + IsPragmaClangAttribute(false), NumTokens(0), + Info(ParsedAttrInfo::get(*this)) { ArgsUnion PVal(ArgKind); memcpy(getArgsBuffer(), &PVal, sizeof(ArgsUnion)); detail::TypeTagForDatatypeData &ExtraData = getTypeTagForDatatypeDataSlot(); @@ -366,7 +381,8 @@ NumArgs(0), Invalid(false), UsedAsTypeAttr(false), IsAvailability(false), IsTypeTagForDatatype(false), IsProperty(false), HasParsedType(true), HasProcessingCache(false), - IsPragmaClangAttribute(false), Info(ParsedAttrInfo::get(*this)) { + IsPragmaClangAttribute(false), NumTokens(0), + Info(ParsedAttrInfo::get(*this)) { new (&getTypeBuffer()) ParsedType(typeArg); } @@ -380,7 +396,8 @@ NumArgs(0), Invalid(false), UsedAsTypeAttr(false), IsAvailability(false), IsTypeTagForDatatype(false), IsProperty(true), HasParsedType(false), HasProcessingCache(false), - IsPragmaClangAttribute(false), Info(ParsedAttrInfo::get(*this)) { + IsPragmaClangAttribute(false), NumTokens(0), + Info(ParsedAttrInfo::get(*this)) { new (&getPropertyDataBuffer()) detail::PropertyData(getterId, setterId); } @@ -459,12 +476,22 @@ /// getNumArgs - Return the number of actual arguments to this attribute. unsigned getNumArgs() const { return NumArgs; } + /// getNumTokens - Return the number of tokens recorded by this attribute. + unsigned getNumTokens() const { return NumTokens; } + /// getArg - Return the specified argument. ArgsUnion getArg(unsigned Arg) const { assert(Arg < NumArgs && "Arg access out of range!"); return getArgsBuffer()[Arg]; } + /// getTokens - Return the array of Tokens. + const Token *getTokens() const { + if (NumTokens == 0) + return nullptr; + return getTokensBuffer(); + } + bool isArgExpr(unsigned Arg) const { return Arg < NumArgs && getArg(Arg).is(); } @@ -653,18 +680,15 @@ class AttributeFactory { public: enum { - AvailabilityAllocSize = - ParsedAttr::totalSizeToAlloc(1, 1, 0, 0, 0), - TypeTagForDatatypeAllocSize = - ParsedAttr::totalSizeToAlloc(1, 0, 1, 0, 0), - PropertyAllocSize = - ParsedAttr::totalSizeToAlloc(0, 0, 0, 0, 1), + AvailabilityAllocSize = ParsedAttr::totalSizeToAlloc< + ArgsUnion, detail::AvailabilityData, detail::TypeTagForDatatypeData, + ParsedType, detail::PropertyData, Token>(1, 1, 0, 0, 0, 0), + TypeTagForDatatypeAllocSize = ParsedAttr::totalSizeToAlloc< + ArgsUnion, detail::AvailabilityData, detail::TypeTagForDatatypeData, + ParsedType, detail::PropertyData, Token>(1, 0, 1, 0, 0, 0), + PropertyAllocSize = ParsedAttr::totalSizeToAlloc< + ArgsUnion, detail::AvailabilityData, detail::TypeTagForDatatypeData, + ParsedType, detail::PropertyData, Token>(0, 0, 0, 0, 1, 0), }; private: @@ -754,20 +778,17 @@ ParsedAttr *create(IdentifierInfo *attrName, SourceRange attrRange, IdentifierInfo *scopeName, SourceLocation scopeLoc, ArgsUnion *args, unsigned numArgs, - ParsedAttr::Syntax syntax, + ParsedAttr::Syntax syntax, Token *tokens = nullptr, + unsigned numTokens = 0, SourceLocation ellipsisLoc = SourceLocation()) { - size_t temp = - ParsedAttr::totalSizeToAlloc(numArgs, 0, 0, 0, 0); - (void)temp; void *memory = allocate( ParsedAttr::totalSizeToAlloc(numArgs, 0, 0, 0, - 0)); - return add(new (memory) ParsedAttr(attrName, attrRange, scopeName, scopeLoc, - args, numArgs, syntax, ellipsisLoc)); + detail::PropertyData, Token>( + numArgs, 0, 0, 0, 0, numTokens)); + return add(new (memory) + ParsedAttr(attrName, attrRange, scopeName, scopeLoc, args, + numArgs, syntax, tokens, numTokens, ellipsisLoc)); } ParsedAttr *create(IdentifierInfo *attrName, SourceRange attrRange, @@ -791,7 +812,8 @@ void *memory = allocate( ParsedAttr::totalSizeToAlloc(3, 0, 0, 0, 0)); + detail::PropertyData, Token>(3, 0, 0, 0, 0, + 0)); return add(new (memory) ParsedAttr(attrName, attrRange, scopeName, scopeLoc, Param1, Param2, Param3, syntax)); } @@ -816,7 +838,8 @@ void *memory = allocate( ParsedAttr::totalSizeToAlloc(0, 0, 0, 1, 0)); + detail::PropertyData, Token>(0, 0, 0, 1, 0, + 0)); return add(new (memory) ParsedAttr(attrName, attrRange, scopeName, scopeLoc, typeArg, syntaxUsed)); } @@ -957,10 +980,12 @@ ParsedAttr *addNew(IdentifierInfo *attrName, SourceRange attrRange, IdentifierInfo *scopeName, SourceLocation scopeLoc, ArgsUnion *args, unsigned numArgs, - ParsedAttr::Syntax syntax, + ParsedAttr::Syntax syntax, Token *tokens = nullptr, + unsigned numTokens = 0, SourceLocation ellipsisLoc = SourceLocation()) { - ParsedAttr *attr = pool.create(attrName, attrRange, scopeName, scopeLoc, - args, numArgs, syntax, ellipsisLoc); + ParsedAttr *attr = + pool.create(attrName, attrRange, scopeName, scopeLoc, args, numArgs, + syntax, tokens, numTokens, ellipsisLoc); addAtEnd(attr); return attr; } diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -970,6 +970,9 @@ if (OnToken) OnToken(Result); } + + if (OnRecordedToken) + OnRecordedToken(Result); } /// Lex a header-name token (including one formed from header-name-tokens if diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -2828,7 +2828,7 @@ ArgsVector ArgExprs; ArgExprs.push_back(ArgExpr.get()); Attrs.addNew(KWName, KWLoc, nullptr, KWLoc, ArgExprs.data(), 1, - ParsedAttr::AS_Keyword, EllipsisLoc); + ParsedAttr::AS_Keyword, nullptr, 0, EllipsisLoc); } ExprResult Parser::ParseExtIntegerArgument() { diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -4096,13 +4096,39 @@ LO.CPlusPlus ? ParsedAttr::AS_CXX11 : ParsedAttr::AS_C2x; // If the attribute isn't known, we will not attempt to parse any - // arguments. + // arguments. Instead, we just record the tokens and add the attribute + // directly. The recording happens here because this is the only place + // where user-defined (via plugins) attributes are parsed, and thus + // they care about the token stream directly. if (!hasAttribute(LO.CPlusPlus ? AttrSyntax::CXX : AttrSyntax::C, ScopeName, AttrName, getTargetInfo(), getLangOpts())) { - // Eat the left paren, then skip to the ending right paren. + // Begin recording session. + SmallVector RecordedTokens; + assert(!PP.hasTokenRecorder()); + PP.setTokenRecorder( + [&RecordedTokens](const Token &Tok) { RecordedTokens.push_back(Tok); }); + + // Eat the left paren. ConsumeParen(); + + // skip to the ending right paren. SkipUntil(tok::r_paren); - return false; + + // End recording session. + PP.setTokenRecorder(nullptr); + + // Add new attribute with the token list. + // We assert that we have at least one token, + // since we have to ignore the final r_paren. + assert(RecordedTokens.size() > 0); + Attrs.addNew( + AttrName, + SourceRange(ScopeLoc.isValid() ? ScopeLoc : AttrNameLoc, AttrNameLoc), + ScopeName, ScopeLoc, nullptr, 0, + getLangOpts().CPlusPlus ? ParsedAttr::AS_CXX11 : ParsedAttr::AS_C2x, + RecordedTokens.data(), RecordedTokens.size() - 2); + + return true; } if (ScopeName && (ScopeName->isStr("gnu") || ScopeName->isStr("__gnu__"))) { diff --git a/clang/lib/Sema/ParsedAttr.cpp b/clang/lib/Sema/ParsedAttr.cpp --- a/clang/lib/Sema/ParsedAttr.cpp +++ b/clang/lib/Sema/ParsedAttr.cpp @@ -45,10 +45,11 @@ else if (HasParsedType) return totalSizeToAlloc(0, 0, 0, 1, 0); + detail::PropertyData, Token>(0, 0, 0, 1, 0, 0); return totalSizeToAlloc(NumArgs, 0, 0, 0, 0); + detail::PropertyData, Token>(NumArgs, 0, 0, 0, 0, + NumTokens); } AttributeFactory::AttributeFactory() { diff --git a/clang/test/Frontend/plugin-print-attr-tokens.cpp b/clang/test/Frontend/plugin-print-attr-tokens.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Frontend/plugin-print-attr-tokens.cpp @@ -0,0 +1,12 @@ +// RUN: %clang -cc1 -load %llvmshlibdir/PrintAttributeTokens%pluginext -fsyntax-only -ast-dump -verify %s +// REQUIRES: plugins, examples + +// expected-no-diagnostics +[[print_tokens( + the, mitochondria, + , Of::The($cell))]] void +fn1a() {} +[[plugin::print_tokens("a string")]] void fn1b() {} +[[plugin::print_tokens()]] void fn1c() {} +[[plugin::print_tokens(some_ident)]] void fn1d() {} +[[plugin::print_tokens(int)]] void fn1e() {}