Index: include/clang/Lex/TokenLexer.h =================================================================== --- include/clang/Lex/TokenLexer.h +++ include/clang/Lex/TokenLexer.h @@ -170,6 +170,12 @@ /// return preexpanded tokens from Tokens. void ExpandFunctionArguments(); + /// Expand a single argument of a function-like macro, so that + /// ExpandFunctionArguments can use it to quickly return preexpanded tokens + /// from Tokens. Returns true if a non-empty token was expanded. + bool ExpandSingleFunctionArgument(const Token &CurTok, int ArgNo, + SmallVector &ResultToks); + /// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes /// together to form a comment that comments out everything in the current /// macro, other active macros, and anything left on the current physical Index: lib/Lex/TokenLexer.cpp =================================================================== --- lib/Lex/TokenLexer.cpp +++ lib/Lex/TokenLexer.cpp @@ -262,52 +262,7 @@ // argument and substitute the expanded tokens into the result. This is // C99 6.10.3.1p1. if (!PasteBefore && !PasteAfter) { - const Token *ResultArgToks; - - // Only preexpand the argument if it could possibly need it. This - // avoids some work in common cases. - const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo); - if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP)) - ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, Macro, PP)[0]; - else - ResultArgToks = ArgTok; // Use non-preexpanded tokens. - - // If the arg token expanded into anything, append it. - if (ResultArgToks->isNot(tok::eof)) { - size_t FirstResult = ResultToks.size(); - unsigned NumToks = MacroArgs::getArgLength(ResultArgToks); - ResultToks.append(ResultArgToks, ResultArgToks+NumToks); - - // In Microsoft-compatibility mode, we follow MSVC's preprocessing - // behavior by not considering single commas from nested macro - // expansions as argument separators. Set a flag on the token so we can - // test for this later when the macro expansion is processed. - if (PP.getLangOpts().MSVCCompat && NumToks == 1 && - ResultToks.back().is(tok::comma)) - ResultToks.back().setFlag(Token::IgnoredComma); - - // If the '##' came from expanding an argument, turn it into 'unknown' - // to avoid pasting. - for (Token &Tok : llvm::make_range(ResultToks.begin() + FirstResult, - ResultToks.end())) { - if (Tok.is(tok::hashhash)) - Tok.setKind(tok::unknown); - } - - if(ExpandLocStart.isValid()) { - updateLocForMacroArgTokens(CurTok.getLocation(), - ResultToks.begin()+FirstResult, - ResultToks.end()); - } - - // If any tokens were substituted from the argument, the whitespace - // before the first token should match the whitespace of the arg - // identifier. - ResultToks[FirstResult].setFlagValue(Token::LeadingSpace, - NextTokGetsSpace); - ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false); - NextTokGetsSpace = false; - } + ExpandSingleFunctionArgument(CurTok, ArgNo, ResultToks); continue; } @@ -328,19 +283,44 @@ PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma); } - ResultToks.append(ArgToks, ArgToks+NumToks); + // MSVC Expands arguments to ## anyway. + if (PP.getLangOpts().MSVCCompat) { + if (!ExpandSingleFunctionArgument(CurTok, ArgNo, ResultToks)) { + // If the token doesn't exist, replace it with an empty string so that + // token pasting will work. + Token BlankTok; + BlankTok.startToken(); + BlankTok.setKind(tok::string_literal); + PP.CreateString("", BlankTok); + + if (!ResultToks.empty()) { + ResultToks.back().setFlagValue(Token::LeadingSpace, + NextTokGetsSpace); + ResultToks.back().setFlagValue(Token::StartOfLine, false); + NextTokGetsSpace = false; + } + + ResultToks.push_back(BlankTok); + if (ExpandLocStart.isValid()) + updateLocForMacroArgTokens(CurTok.getLocation(), + ResultToks.end() - 1, ResultToks.end()); + } + } else { + ResultToks.append(ArgToks, ArgToks + NumToks); - // If the '##' came from expanding an argument, turn it into 'unknown' - // to avoid pasting. - for (Token &Tok : llvm::make_range(ResultToks.end() - NumToks, - ResultToks.end())) { - if (Tok.is(tok::hashhash)) - Tok.setKind(tok::unknown); - } + // If the '##' came from expanding an argument, turn it into 'unknown' + // to avoid pasting. + for (Token &Tok : + llvm::make_range(ResultToks.end() - NumToks, ResultToks.end())) { + if (Tok.is(tok::hashhash)) + Tok.setKind(tok::unknown); + } - if (ExpandLocStart.isValid()) { - updateLocForMacroArgTokens(CurTok.getLocation(), - ResultToks.end()-NumToks, ResultToks.end()); + if (ExpandLocStart.isValid()) { + updateLocForMacroArgTokens(CurTok.getLocation(), + ResultToks.end() - NumToks, + ResultToks.end()); + } } // If this token (the macro argument) was supposed to get leading @@ -404,6 +384,60 @@ } } +/// Expand a single argument of a function-like macro, so that +/// ExpandFunctionArguments can use it to quickly return preexpanded tokens +/// from Tokens. Returns true if the argument expanded to anything. +bool TokenLexer::ExpandSingleFunctionArgument( + const Token &CurTok, int ArgNo, SmallVector &ResultToks) { + const Token *UnexpArgs = ActualArgs->getUnexpArgument(ArgNo); + const Token *ExpArgs = nullptr; + + // Only preexpand the argument if it could possibly need it. This + // avoids some work in common cases. + if (ActualArgs->ArgNeedsPreexpansion(UnexpArgs, PP)) + ExpArgs = &ActualArgs->getPreExpArgument(ArgNo, Macro, PP)[0]; + else + ExpArgs = UnexpArgs; // Use non-preexpanded tokens. + + // If the arg token expanded into anything, append it. + if (ExpArgs->isNot(tok::eof)) { + size_t FirstResult = ResultToks.size(); + unsigned NumToks = MacroArgs::getArgLength(ExpArgs); + ResultToks.append(ExpArgs, ExpArgs + NumToks); + + // In Microsoft-compatibility mode, we follow MSVC's preprocessing + // behavior by not considering single commas from nested macro + // expansions as argument separators. Set a flag on the token so we can + // test for this later when the macro expansion is processed. + if (PP.getLangOpts().MSVCCompat && NumToks == 1 && + ResultToks.back().is(tok::comma)) + ResultToks.back().setFlag(Token::IgnoredComma); + + // If the '##' came from expanding an argument, turn it into 'unknown' + // to avoid pasting. + for (Token &Tok : + llvm::make_range(ResultToks.begin() + FirstResult, ResultToks.end())) { + if (Tok.is(tok::hashhash)) + Tok.setKind(tok::unknown); + } + + if (ExpandLocStart.isValid()) { + updateLocForMacroArgTokens(CurTok.getLocation(), + ResultToks.begin() + FirstResult, + ResultToks.end()); + } + + // If any tokens were substituted from the argument, the whitespace + // before the first token should match the whitespace of the arg + // identifier. + ResultToks[FirstResult].setFlagValue(Token::LeadingSpace, NextTokGetsSpace); + ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false); + NextTokGetsSpace = false; + return true; + } + return false; +} + /// \brief Checks if two tokens form wide string literal. static bool isWideStringLiteralFromMacro(const Token &FirstTok, const Token &SecondTok) { Index: test/Preprocessor/microsoft-ext.c =================================================================== --- test/Preprocessor/microsoft-ext.c +++ test/Preprocessor/microsoft-ext.c @@ -52,8 +52,8 @@ #undef COMMA_ELIDER #undef macro -// When expanding stringized __VA_ARGS__, MSVC expands omitted __VA_ARGS__ as nothing, -// and empty __VA_ARGS__ as the empty-string. +// When expanding stringized __VA_ARGS__, MSVC expands omitted __VA_ARGS__ as +// nothing and empty __VA_ARGS__ as the empty-string. #define M(x, ...) #__VA_ARGS__ Many is M(a,b,c)BAR // CHECK: Many is "b,c"BAR @@ -63,3 +63,30 @@ // CHECK: Omitted is BAR #undef M +// MSVC Preprocessor expands arguments to the ## operator. +#define M(a, ...) a ## __VA_ARGS__ +#define TWO 2 +TWELVE: M(1,TWO) +// CHECK: TWELVE: 12 +TWELVE_A: M(1, TWO) +// CHECK: TWELVE_A: 12 +TWENTYONE: M(TWO,1) +// CHECK: TWENTYONE: 21 +TWENTYONE_A: M(TWO, 1) +// CHECK: TWENTYONE_A: 21 +#undef TWO +#undef M + +// Also validate the situation where "TWO" is empty. +#define M(a, ...) a ## __VA_ARGS__ +#define TWO +TWELVE_EMPTY:M(1,TWO) +// CHECK: TWELVE_EMPTY:1 +TWELVE_EMPTY_A: M(1,TWO) +// CHECK: TWELVE_EMPTY_A: 1 +TWENTYONE_EMPTY:M(TWO, 1) +// CHECK: TWENTYONE_EMPTY:1 +TWENTYONE_EMPTY_A: M(TWO, 1) +// CHECK: TWENTYONE_EMPTY_A: 1 +#undef TWO +#undef M