diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -2475,6 +2475,16 @@ Use #line in preprocessed output +.. option:: -fminimize-whitespace, -fno-minimize-whitespace + +Ignore the whitespace from the input file when emitting preprocessor +output. It will only contain whitespace when necessary, e.g. to keep two +minus signs from merging into to an increment operator. Useful with the +-P option to normalize whitespace such that two files with only formatting +changes are equal. + +Only valid with -E on C-like inputs and incompatible with -traditional-cpp. + .. option:: -fvalidate-ast-input-files-content Compute and store the hash of input files used to build an AST. Files with mismatching mtime's are considered valid if both contents is identical diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -129,6 +129,8 @@ "invalid -Xopenmp-target argument: '%0', options requiring arguments are unsupported">; def err_drv_argument_only_allowed_with : Error< "invalid argument '%0' only allowed with '%1'">; +def err_drv_minws_unsupported_input_type : Error< + "'-fminimize-whitespace' invalid for input of type %0">; def err_drv_amdgpu_ieee_without_no_honor_nans : Error< "invalid argument '-mno-amdgpu-ieee' only allowed with relaxed NaN handling">; def err_drv_argument_not_allowed_with : Error< diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1799,6 +1799,9 @@ defm use_line_directives : BoolFOption<"use-line-directives", PreprocessorOutputOpts<"UseLineDirectives">, DefaultFalse, PosFlag, NegFlag>; +defm minimize_whitespace : BoolFOption<"minimize-whitespace", + PreprocessorOutputOpts<"MinimizeWhitespace">, DefaultFalse, + PosFlag, NegFlag>; def ffreestanding : Flag<["-"], "ffreestanding">, Group, Flags<[CC1Option]>, HelpText<"Assert that the compilation takes place in a freestanding environment">, diff --git a/clang/include/clang/Driver/Types.h b/clang/include/clang/Driver/Types.h --- a/clang/include/clang/Driver/Types.h +++ b/clang/include/clang/Driver/Types.h @@ -66,6 +66,14 @@ /// isAcceptedByClang - Can clang handle this input type. bool isAcceptedByClang(ID Id); + /// isDerivedFromC - Is the input derived from C. + /// + /// That is, does the lexer follow the rules of + /// TokenConcatenation::AvoidConcat. If this is the case, the preprocessor may + /// add and remove whitespace between tokens. Used to determine whether the + /// input can be processed by -fminimize-whitespace. + bool isDerivedFromC(ID Id); + /// isCXX - Is this a "C++" input (C++ and Obj-C++ sources and headers). bool isCXX(ID Id); diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h --- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h +++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h @@ -24,6 +24,7 @@ unsigned ShowIncludeDirectives : 1; ///< Print includes, imports etc. within preprocessed output. unsigned RewriteIncludes : 1; ///< Preprocess include directives only. unsigned RewriteImports : 1; ///< Include contents of transitively-imported modules. + unsigned MinimizeWhitespace : 1; ///< Ignore whitespace from input. public: PreprocessorOutputOptions() { @@ -36,6 +37,7 @@ ShowIncludeDirectives = 0; RewriteIncludes = 0; RewriteImports = 0; + MinimizeWhitespace = 0; } }; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -52,8 +52,9 @@ using namespace llvm::opt; static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) { - if (Arg *A = - Args.getLastArg(clang::driver::options::OPT_C, options::OPT_CC)) { + if (Arg *A = Args.getLastArg(clang::driver::options::OPT_C, options::OPT_CC, + options::OPT_fminimize_whitespace, + options::OPT_fno_minimize_whitespace)) { if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_P) && !Args.hasArg(options::OPT__SLASH_EP) && !D.CCCIsCPP()) { D.Diag(clang::diag::err_drv_argument_only_allowed_with) @@ -6067,6 +6068,16 @@ options::OPT_fno_use_line_directives, false)) CmdArgs.push_back("-fuse-line-directives"); + // -fno-minimize-whitespace is default. + if (Args.hasFlag(options::OPT_fminimize_whitespace, + options::OPT_fno_minimize_whitespace, false)) { + types::ID InputType = Inputs[0].getType(); + if (!isDerivedFromC(InputType)) + D.Diag(diag::err_drv_minws_unsupported_input_type) + << types::getTypeName(InputType); + CmdArgs.push_back("-fminimize-whitespace"); + } + // -fms-extensions=0 is default. if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions, IsWindowsMSVC)) diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp --- a/clang/lib/Driver/Types.cpp +++ b/clang/lib/Driver/Types.cpp @@ -147,6 +147,45 @@ } } +bool types::isDerivedFromC(ID Id) { + switch (Id) { + default: + return false; + + case TY_PP_C: + case TY_C: + case TY_CL: + case TY_CLCXX: + case TY_PP_CUDA: + case TY_CUDA: + case TY_CUDA_DEVICE: + case TY_PP_HIP: + case TY_HIP: + case TY_HIP_DEVICE: + case TY_PP_ObjC: + case TY_PP_ObjC_Alias: + case TY_ObjC: + case TY_PP_CXX: + case TY_CXX: + case TY_PP_ObjCXX: + case TY_PP_ObjCXX_Alias: + case TY_ObjCXX: + case TY_RenderScript: + case TY_PP_CHeader: + case TY_CHeader: + case TY_CLHeader: + case TY_PP_ObjCHeader: + case TY_ObjCHeader: + case TY_PP_CXXHeader: + case TY_CXXHeader: + case TY_PP_ObjCXXHeader: + case TY_ObjCXXHeader: + case TY_CXXModule: + case TY_PP_CXXModule: + return true; + } +} + bool types::isObjC(ID Id) { switch (Id) { default: diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -95,14 +95,20 @@ bool DumpIncludeDirectives; bool UseLineDirectives; bool IsFirstFileEntered; + bool MinimizeWhitespace; + + Token PrevTok; + Token PrevPrevTok; + public: PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers, bool defines, bool DumpIncludeDirectives, - bool UseLineDirectives) + bool UseLineDirectives, bool MinimizeWhitespace) : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os), DisableLineMarkers(lineMarkers), DumpDefines(defines), DumpIncludeDirectives(DumpIncludeDirectives), - UseLineDirectives(UseLineDirectives) { + UseLineDirectives(UseLineDirectives), + MinimizeWhitespace(MinimizeWhitespace) { CurLine = 0; CurFilename += ""; EmittedTokensOnThisLine = false; @@ -110,8 +116,13 @@ FileType = SrcMgr::C_User; Initialized = false; IsFirstFileEntered = false; + + PrevTok.startToken(); + PrevPrevTok.startToken(); } + bool isMinimizeWhitespace() const { return MinimizeWhitespace; } + void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; } bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; } @@ -120,7 +131,12 @@ return EmittedDirectiveOnThisLine; } - bool startNewLineIfNeeded(bool ShouldUpdateCurrentLine = true); + /// Ensure that the output stream position is at the beginning of a new line + /// and inserts one if it does not. It is intended to ensure that directives + /// inserted by the directives not from the input source (such as #line) are + /// in the first column. To insert newlines that represent the input, use + /// MoveToLine(/*...*/, /*RequireStartOfLine=*/true). + void startNewLineIfNeeded(); void FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind FileType, @@ -148,18 +164,45 @@ void PragmaAssumeNonNullBegin(SourceLocation Loc) override; void PragmaAssumeNonNullEnd(SourceLocation Loc) override; - bool HandleFirstTokOnLine(Token &Tok); + /// Insert whitespace before emitting the next token. + /// + /// @param Tok Next token to be emitted. + /// @param RequireSpace Ensure at least one whitespace is emitted. Useful + /// if non-tokens have been emitted to the stream. + /// @param RequireSameLine Never emit newlines. Useful when semantics depend + /// on being on the same line, such as directives. + void HandleWhitespaceBeforeTok(const Token &Tok, bool RequireSpace, + bool RequireSameLine); /// Move to the line of the provided source location. This will - /// return true if the output stream required adjustment or if - /// the requested location is on the first line. - bool MoveToLine(SourceLocation Loc) { + /// return true if a newline was inserted or if + /// the requested location is the first token on the first line. + /// In these cases the next output will be the first column on the line and + /// make it possible to insert indention. The newline was inserted + /// implicitly when at the beginning of the file. + /// + /// @param Tok Token where to move to. + /// @param RequiresStartOfLine Whether the next line depends on being in the + /// first column, such as a directive. + /// + /// @return Whether column adjustments are necessary. + bool MoveToLine(const Token &Tok, bool RequireStartOfLine) { + PresumedLoc PLoc = SM.getPresumedLoc(Tok.getLocation()); + if (PLoc.isInvalid()) + return false; + bool IsFirstInFile = Tok.isAtStartOfLine() && PLoc.getLine() == 1; + return MoveToLine(PLoc.getLine(), RequireStartOfLine) || IsFirstInFile; + } + + /// Move to the line of the provided source location. Returns true if a new + /// line was inserted. + bool MoveToLine(SourceLocation Loc, bool RequireStartOfLine) { PresumedLoc PLoc = SM.getPresumedLoc(Loc); if (PLoc.isInvalid()) return false; - return MoveToLine(PLoc.getLine()) || (PLoc.getLine() == 1); + return MoveToLine(PLoc.getLine(), RequireStartOfLine); } - bool MoveToLine(unsigned LineNo); + bool MoveToLine(unsigned LineNo, bool RequireStartOfLine); bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, const Token &Tok) { @@ -187,7 +230,7 @@ void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo, const char *Extra, unsigned ExtraLen) { - startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false); + startNewLineIfNeeded(); // Emit #line directives or GNU line markers depending on what mode we're in. if (UseLineDirectives) { @@ -214,43 +257,57 @@ /// object. We can do this by emitting some number of \n's, or be emitting a /// #line directive. This returns false if already at the specified line, true /// if some newlines were emitted. -bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) { +bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo, + bool RequireStartOfLine) { + // If it is required to start a new line or finish the current, insert + // vertical whitespace now and take it into account when moving to the + // expected line. + bool StartedNewLine = false; + if ((RequireStartOfLine && EmittedTokensOnThisLine) || + EmittedDirectiveOnThisLine) { + OS << '\n'; + StartedNewLine = true; + CurLine += 1; + EmittedTokensOnThisLine = false; + EmittedDirectiveOnThisLine = false; + } + // If this line is "close enough" to the original line, just print newlines, // otherwise print a #line directive. - if (LineNo-CurLine <= 8) { - if (LineNo-CurLine == 1) - OS << '\n'; - else if (LineNo == CurLine) - return false; // Spelling line moved, but expansion line didn't. - else { - const char *NewLines = "\n\n\n\n\n\n\n\n"; - OS.write(NewLines, LineNo-CurLine); - } + if (CurLine == LineNo) { + // Nothing to do if we are already on the correct line. + } else if (!StartedNewLine && (!MinimizeWhitespace || !DisableLineMarkers) && + LineNo - CurLine == 1) { + // Printing a single line has priority over printing a #line directive, even + // when minimizing whitespace which otherwise would print #line directives + // for every single line. + OS << '\n'; + StartedNewLine = true; + } else if (!MinimizeWhitespace && LineNo - CurLine <= 8) { + const char *NewLines = "\n\n\n\n\n\n\n\n"; + OS.write(NewLines, LineNo - CurLine); + StartedNewLine = true; } else if (!DisableLineMarkers) { // Emit a #line or line marker. WriteLineInfo(LineNo, nullptr, 0); - } else { - // Okay, we're in -P mode, which turns off line markers. However, we still - // need to emit a newline between tokens on different lines. - startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false); + StartedNewLine = true; + } + + if (StartedNewLine) { + EmittedTokensOnThisLine = false; + EmittedDirectiveOnThisLine = false; } CurLine = LineNo; - return true; + return StartedNewLine; } -bool -PrintPPOutputPPCallbacks::startNewLineIfNeeded(bool ShouldUpdateCurrentLine) { +void PrintPPOutputPPCallbacks::startNewLineIfNeeded() { if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) { OS << '\n'; EmittedTokensOnThisLine = false; EmittedDirectiveOnThisLine = false; - if (ShouldUpdateCurrentLine) - ++CurLine; - return true; } - - return false; } /// FileChanged - Whenever the preprocessor enters or exits a #include file @@ -273,7 +330,7 @@ if (Reason == PPCallbacks::EnterFile) { SourceLocation IncludeLoc = UserLoc.getIncludeLoc(); if (IncludeLoc.isValid()) - MoveToLine(IncludeLoc); + MoveToLine(IncludeLoc, /*RequireStartOfLine=*/false); } else if (Reason == PPCallbacks::SystemHeaderPragma) { // GCC emits the # directive for this directive on the line AFTER the // directive and emits a bunch of spaces that aren't needed. This is because @@ -290,7 +347,8 @@ FileType = NewFileType; if (DisableLineMarkers) { - startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false); + if (!MinimizeWhitespace) + startNewLineIfNeeded(); return; } @@ -336,15 +394,13 @@ // In -dI mode, dump #include directives prior to dumping their content or // interpretation. if (DumpIncludeDirectives) { - startNewLineIfNeeded(); - MoveToLine(HashLoc); + MoveToLine(HashLoc, /*RequireStartOfLine=*/true); const std::string TokenText = PP.getSpelling(IncludeTok); assert(!TokenText.empty()); OS << "#" << TokenText << " " << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"') << " /* clang -E -dI */"; setEmittedDirectiveOnThisLine(); - startNewLineIfNeeded(); } // When preprocessing, turn implicit imports into module import pragmas. @@ -353,17 +409,13 @@ case tok::pp_include: case tok::pp_import: case tok::pp_include_next: - startNewLineIfNeeded(); - MoveToLine(HashLoc); + MoveToLine(HashLoc, /*RequireStartOfLine=*/true); OS << "#pragma clang module import " << Imported->getFullModuleName(true) << " /* clang -E: implicit import for " << "#" << PP.getSpelling(IncludeTok) << " " << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"') << " */"; - // Since we want a newline after the pragma, but not a #, start a - // new line immediately. - EmittedTokensOnThisLine = true; - startNewLineIfNeeded(); + setEmittedDirectiveOnThisLine(); break; case tok::pp___include_macros: @@ -398,11 +450,11 @@ /// Ident - Handle #ident directives when read by the preprocessor. /// void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) { - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS.write("#ident ", strlen("#ident ")); OS.write(S.begin(), S.size()); - EmittedTokensOnThisLine = true; + setEmittedTokensOnThisLine(); } /// MacroDefined - This hook is called whenever a macro definition is seen. @@ -414,7 +466,7 @@ // Ignore __FILE__ etc. MI->isBuiltinMacro()) return; - MoveToLine(MI->getDefinitionLoc()); + MoveToLine(MI->getDefinitionLoc(), /*RequireStartOfLine=*/true); PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS); setEmittedDirectiveOnThisLine(); } @@ -425,7 +477,7 @@ // Only print out macro definitions in -dD mode. if (!DumpDefines) return; - MoveToLine(MacroNameTok.getLocation()); + MoveToLine(MacroNameTok.getLocation(), /*RequireStartOfLine=*/true); OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName(); setEmittedDirectiveOnThisLine(); } @@ -446,8 +498,7 @@ StringRef Namespace, PragmaMessageKind Kind, StringRef Str) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma "; if (!Namespace.empty()) OS << Namespace << ' '; @@ -472,8 +523,7 @@ void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc, StringRef DebugType) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma clang __debug "; OS << DebugType; @@ -483,16 +533,14 @@ void PrintPPOutputPPCallbacks:: PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma " << Namespace << " diagnostic push"; setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks:: PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma " << Namespace << " diagnostic pop"; setEmittedDirectiveOnThisLine(); } @@ -501,8 +549,7 @@ StringRef Namespace, diag::Severity Map, StringRef Str) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma " << Namespace << " diagnostic "; switch (Map) { case diag::Severity::Remark: @@ -528,8 +575,7 @@ void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc, StringRef WarningSpec, ArrayRef Ids) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma warning(" << WarningSpec << ':'; for (ArrayRef::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I) OS << ' ' << *I; @@ -539,8 +585,7 @@ void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc, int Level) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma warning(push"; if (Level >= 0) OS << ", " << Level; @@ -549,16 +594,14 @@ } void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma warning(pop)"; setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc, StringRef Str) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma character_execution_set(push"; if (!Str.empty()) OS << ", " << Str; @@ -567,64 +610,80 @@ } void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma character_execution_set(pop)"; setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks:: PragmaAssumeNonNullBegin(SourceLocation Loc) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma clang assume_nonnull begin"; setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks:: PragmaAssumeNonNullEnd(SourceLocation Loc) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma clang assume_nonnull end"; setEmittedDirectiveOnThisLine(); } -/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this -/// is called for the first token on each new line. If this really is the start -/// of a new logical line, handle it and return true, otherwise return false. -/// This may not be the start of a logical line because the "start of line" -/// marker is set for spelling lines, not expansion ones. -bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) { - // Figure out what line we went to and insert the appropriate number of - // newline characters. - if (!MoveToLine(Tok.getLocation())) - return false; - - // Print out space characters so that the first token on a line is - // indented for easy reading. - unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation()); - - // The first token on a line can have a column number of 1, yet still expect - // leading white space, if a macro expansion in column 1 starts with an empty - // macro argument, or an empty nested macro expansion. In this case, move the - // token to column 2. - if (ColNo == 1 && Tok.hasLeadingSpace()) - ColNo = 2; - - // This hack prevents stuff like: - // #define HASH # - // HASH define foo bar - // From having the # character end up at column 1, which makes it so it - // is not handled as a #define next time through the preprocessor if in - // -fpreprocessed mode. - if (ColNo <= 1 && Tok.is(tok::hash)) - OS << ' '; +void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok, + bool RequireSpace, + bool RequireSameLine) { + // These tokens are not expanded to anything and don't need whitespace before + // them. + if (Tok.is(tok::eof) || + (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) && + !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end))) + return; - // Otherwise, indent the appropriate number of spaces. - for (; ColNo > 1; --ColNo) - OS << ' '; + if (!RequireSameLine && MoveToLine(Tok, /*RequireStartOfLine=*/false)) { + if (MinimizeWhitespace) { + // Avoid interpreting hash as a directive under -fpreprocessed. + if (Tok.is(tok::hash)) + OS << ' '; + } else { + // Print out space characters so that the first token on a line is + // indented for easy reading. + unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation()); + + // The first token on a line can have a column number of 1, yet still + // expect leading white space, if a macro expansion in column 1 starts + // with an empty macro argument, or an empty nested macro expansion. In + // this case, move the token to column 2. + if (ColNo == 1 && Tok.hasLeadingSpace()) + ColNo = 2; + + // This hack prevents stuff like: + // #define HASH # + // HASH define foo bar + // From having the # character end up at column 1, which makes it so it + // is not handled as a #define next time through the preprocessor if in + // -fpreprocessed mode. + if (ColNo <= 1 && Tok.is(tok::hash)) + OS << ' '; + + // Otherwise, indent the appropriate number of spaces. + for (; ColNo > 1; --ColNo) + OS << ' '; + } + } else { + // Insert whitespace between the previous and next token if either + // - The caller requires it + // - The input had whitespace between them and we are not in + // whitespace-minimization mode + // - The whitespace is necessary to keep the tokens apart and there is not + // already a newline between them + if (RequireSpace || (!MinimizeWhitespace && Tok.hasLeadingSpace()) || + ((EmittedTokensOnThisLine || EmittedTokensOnThisLine) && + AvoidConcat(PrevPrevTok, PrevTok, Tok))) + OS << ' '; + } - return true; + PrevPrevTok = PrevTok; + PrevTok = Tok; } void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr, @@ -668,9 +727,9 @@ Token &PragmaTok) override { // Figure out what line we went to and insert the appropriate number of // newline characters. - Callbacks->startNewLineIfNeeded(); - Callbacks->MoveToLine(PragmaTok.getLocation()); + Callbacks->MoveToLine(PragmaTok.getLocation(), /*RequireStartOfLine=*/true); Callbacks->OS.write(Prefix, strlen(Prefix)); + Callbacks->setEmittedTokensOnThisLine(); if (ShouldExpandTokens) { // The first token does not have expanded macros. Expand them, if @@ -682,21 +741,16 @@ /*IsReinject=*/false); PP.Lex(PragmaTok); } - Token PrevToken; - Token PrevPrevToken; - PrevToken.startToken(); - PrevPrevToken.startToken(); // Read and print all of the pragma tokens. + bool IsFirst = true; while (PragmaTok.isNot(tok::eod)) { - if (PragmaTok.hasLeadingSpace() || - Callbacks->AvoidConcat(PrevPrevToken, PrevToken, PragmaTok)) - Callbacks->OS << ' '; + Callbacks->HandleWhitespaceBeforeTok(PragmaTok, /*RequireSpace=*/IsFirst, + /*RequireSameLine=*/true); + IsFirst = false; std::string TokSpell = PP.getSpelling(PragmaTok); Callbacks->OS.write(&TokSpell[0], TokSpell.size()); - - PrevPrevToken = PrevToken; - PrevToken = PragmaTok; + Callbacks->setEmittedTokensOnThisLine(); if (ShouldExpandTokens) PP.Lex(PragmaTok); @@ -715,44 +769,41 @@ bool DropComments = PP.getLangOpts().TraditionalCPP && !PP.getCommentRetentionState(); + bool IsStartOfLine = false; char Buffer[256]; - Token PrevPrevTok, PrevTok; - PrevPrevTok.startToken(); - PrevTok.startToken(); while (1) { - if (Callbacks->hasEmittedDirectiveOnThisLine()) { - Callbacks->startNewLineIfNeeded(); - Callbacks->MoveToLine(Tok.getLocation()); - } - - // If this token is at the start of a line, emit newlines if needed. - if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) { - // done. - } else if (Tok.hasLeadingSpace() || - // If we haven't emitted a token on this line yet, PrevTok isn't - // useful to look at and no concatenation could happen anyway. - (Callbacks->hasEmittedTokensOnThisLine() && - // Don't print "-" next to "-", it would form "--". - Callbacks->AvoidConcat(PrevPrevTok, PrevTok, Tok))) { - OS << ' '; - } + // Two lines joined with line continuation ('\' as last character on the + // line) must be emitted as one line even though Tok.getLine() returns two + // different values. In this situation Tok.isAtStartOfLine() is false even + // though it may be the first token on the lexical line. When + // dropping/skipping a token that is at the start of a line, propagate the + // start-of-line-ness to the next token to not append it to the previous + // line. + IsStartOfLine = IsStartOfLine || Tok.isAtStartOfLine(); + + Callbacks->HandleWhitespaceBeforeTok(Tok, /*RequireSpace=*/false, + /*RequireSameLine=*/!IsStartOfLine); if (DropComments && Tok.is(tok::comment)) { // Skip comments. Normally the preprocessor does not generate // tok::comment nodes at all when not keeping comments, but under // -traditional-cpp the lexer keeps /all/ whitespace, including comments. - SourceLocation StartLoc = Tok.getLocation(); - Callbacks->MoveToLine(StartLoc.getLocWithOffset(Tok.getLength())); + PP.Lex(Tok); + continue; } else if (Tok.is(tok::eod)) { // Don't print end of directive tokens, since they are typically newlines // that mess up our line tracking. These come from unknown pre-processor // directives or hash-prefixed comments in standalone assembly files. PP.Lex(Tok); + // FIXME: The token on the next line after #include should have + // Tok.isAtStartOfLine() set. + IsStartOfLine = true; continue; } else if (Tok.is(tok::annot_module_include)) { // PrintPPOutputPPCallbacks::InclusionDirective handles producing // appropriate output here. Ignore this token entirely. PP.Lex(Tok); + IsStartOfLine = true; continue; } else if (Tok.is(tok::annot_module_begin)) { // FIXME: We retrieve this token after the FileChanged callback, and @@ -764,11 +815,13 @@ Callbacks->BeginModule( reinterpret_cast(Tok.getAnnotationValue())); PP.Lex(Tok); + IsStartOfLine = true; continue; } else if (Tok.is(tok::annot_module_end)) { Callbacks->EndModule( reinterpret_cast(Tok.getAnnotationValue())); PP.Lex(Tok); + IsStartOfLine = true; continue; } else if (Tok.is(tok::annot_header_unit)) { // This is a header-name that has been (effectively) converted into a @@ -796,8 +849,17 @@ // Tokens that can contain embedded newlines need to adjust our current // line number. + // FIXME: The token may end with a newline in which case + // setEmittedDirectiveOnThisLine/setEmittedTokensOnThisLine afterwards is + // wrong. if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown) Callbacks->HandleNewlinesInToken(TokPtr, Len); + if (Tok.is(tok::comment) && Len >= 2 && TokPtr[0] == '/' && + TokPtr[1] == '/') { + // It's a line comment; + // Ensure that we don't concatenate anything behind it. + Callbacks->setEmittedDirectiveOnThisLine(); + } } else { std::string S = PP.getSpelling(Tok); OS.write(S.data(), S.size()); @@ -806,13 +868,17 @@ // line number. if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown) Callbacks->HandleNewlinesInToken(S.data(), S.size()); + if (Tok.is(tok::comment) && S.size() >= 2 && S[0] == '/' && S[1] == '/') { + // It's a line comment; + // Ensure that we don't concatenate anything behind it. + Callbacks->setEmittedDirectiveOnThisLine(); + } } Callbacks->setEmittedTokensOnThisLine(); + IsStartOfLine = false; if (Tok.is(tok::eof)) break; - PrevPrevTok = PrevTok; - PrevTok = Tok; PP.Lex(Tok); } } @@ -870,7 +936,8 @@ PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks( PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros, - Opts.ShowIncludeDirectives, Opts.UseLineDirectives); + Opts.ShowIncludeDirectives, Opts.UseLineDirectives, + Opts.MinimizeWhitespace); // Expand macros in pragmas with -fms-extensions. The assumption is that // the majority of pragmas in such a file will be Microsoft pragmas. diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -716,6 +716,12 @@ } // Update the token info (identifier info and appropriate token kind). + // FIXME: the raw_identifier may contain leading whitespace which is removed + // from the cleaned identifier token. The SourceLocation should be updated to + // refer to the non-whitespace character. For instance, the text "\\\nB" (a + // line continuation before 'B') is parsed as a single tok::raw_identifier and + // is cleaned to tok::identifier "B". After cleaning the token's length is + // still 3 and the SourceLocation refers to the location of the backslash. Identifier.setIdentifierInfo(II); if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() && getSourceManager().isInSystemHeader(Identifier.getLocation())) diff --git a/clang/test/Preprocessor/comment_save.c b/clang/test/Preprocessor/comment_save.c --- a/clang/test/Preprocessor/comment_save.c +++ b/clang/test/Preprocessor/comment_save.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -E -C %s | FileCheck -strict-whitespace %s +// RUN: %clang_cc1 -E -C -fminimize-whitespace %s | FileCheck -strict-whitespace %s // foo // CHECK: // foo diff --git a/clang/test/Preprocessor/first-line-indent.c b/clang/test/Preprocessor/first-line-indent.c --- a/clang/test/Preprocessor/first-line-indent.c +++ b/clang/test/Preprocessor/first-line-indent.c @@ -1,7 +1,14 @@ foo // RUN: %clang_cc1 -E %s | FileCheck -strict-whitespace %s +// RUN: %clang_cc1 -E -fminimize-whitespace %s | FileCheck -strict-whitespace %s --check-prefix=MINCOL +// RUN: %clang_cc1 -E -fminimize-whitespace -P %s | FileCheck -strict-whitespace %s --check-prefix=MINWS bar // CHECK: {{^ }}foo // CHECK: {{^ }}bar +// MINCOL: {{^}}foo +// MINCOL: {{^}}bar + +// MINWS: {{^}}foo bar + diff --git a/clang/test/Preprocessor/hash_line.c b/clang/test/Preprocessor/hash_line.c --- a/clang/test/Preprocessor/hash_line.c +++ b/clang/test/Preprocessor/hash_line.c @@ -4,6 +4,10 @@ // CHECK-NEXT: {{^ #$}} // CHECK-NEXT: {{^2$}} // CHECK-NEXT: {{^ #$}} + +// RUN: %clang_cc1 -E -P -fminimize-whitespace %s | FileCheck --strict-whitespace %s --check-prefix=MINWS +// MINWS: {{^}}1#2#{{$}} + #define EMPTY #define IDENTITY(X) X 1 diff --git a/clang/test/Preprocessor/line-directive-output-mincol.c b/clang/test/Preprocessor/line-directive-output-mincol.c new file mode 100644 --- /dev/null +++ b/clang/test/Preprocessor/line-directive-output-mincol.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -E -fminimize-whitespace %s 2>&1 | FileCheck %s -strict-whitespace + +// CHECK: # 6 "{{.*}}line-directive-output-mincol.c" +// CHECK-NEXT: int x; +// CHECK-NEXT: int y; +int x; +int y; +// CHECK-NEXT: # 10 "{{.*}}line-directive-output-mincol.c" +// CHECK-NEXT: int z; +int z; + diff --git a/clang/test/Preprocessor/line-directive-output.c b/clang/test/Preprocessor/line-directive-output.c --- a/clang/test/Preprocessor/line-directive-output.c +++ b/clang/test/Preprocessor/line-directive-output.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -E %s 2>&1 | FileCheck %s -strict-whitespace +// RUN: %clang_cc1 -E -fminimize-whitespace %s 2>&1 | FileCheck %s -strict-whitespace // PR6101 int a; // CHECK: # 1 "{{.*}}line-directive-output.c" diff --git a/clang/test/Preprocessor/macro_space.c b/clang/test/Preprocessor/macro_space.c --- a/clang/test/Preprocessor/macro_space.c +++ b/clang/test/Preprocessor/macro_space.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -E %s | FileCheck --strict-whitespace %s +// RUN: %clang_cc1 -E -P -fminimize-whitespace %s | FileCheck --strict-whitespace %s --check-prefix=MINWS #define FOO1() #define FOO2(x)x @@ -13,24 +14,32 @@ TEST(FOO1,) // CHECK: FOO1 <> < > <> <> < > <> < > < > +// MINWS: FOO1<><><><><><><><> TEST(FOO2,) // CHECK: FOO2 <> < > <> <> < > <> < > < > +// MINWS-SAME: FOO2<><><><><><><><> TEST(FOO3,) // CHECK: FOO3 <> < > <> <> < > <> < > < > +// MINWS-SAME: FOO3<><><><><><><><> TEST(FOO4,) // CHECK: FOO4 < > < > < > < > < > < > < > < > +// MINWS-SAME: FOO4<><><><><><><><> TEST(FOO5,) // CHECK: FOO5 < > < > < > < > < > < > < > < > +// MINWS-SAME: FOO5<><><><><><><><> TEST(FOO6,) // CHECK: FOO6 <[]> < []> <[]> <[]> <[] > <[]> <[] > < []> +// MINWS-SAME: FOO6<[]><[]><[]><[]><[]><[]><[]><[]> TEST(FOO7,) // CHECK: FOO7 <[ ]> < [ ]> <[ ]> <[ ]> <[ ] > <[ ]> <[ ] > < [ ]> +// MINWS-SAME: FOO7<[]><[]><[]><[]><[]><[]><[]><[]> TEST(FOO8,) // CHECK: FOO8 <[ ]> < [ ]> <[ ]> <[ ]> <[ ] > <[ ]> <[ ] > < [ ]> +// MINWS-SAME: FOO8<[]><[]><[]><[]><[]><[]><[]><[]> diff --git a/clang/test/Preprocessor/minimize-whitespace-messages.c b/clang/test/Preprocessor/minimize-whitespace-messages.c new file mode 100644 --- /dev/null +++ b/clang/test/Preprocessor/minimize-whitespace-messages.c @@ -0,0 +1,8 @@ +// RUN: not %clang -c -fminimize-whitespace %s 2>&1 | FileCheck %s --check-prefix=ON +// ON: error: invalid argument '-fminimize-whitespace' only allowed with '-E' + +// RUN: not %clang -c -fno-minimize-whitespace %s 2>&1 | FileCheck %s --check-prefix=OFF +// OFF: error: invalid argument '-fno-minimize-whitespace' only allowed with '-E' + +// RUN: not %clang -E -fminimize-whitespace -x assembler-with-cpp %s 2>&1 | FileCheck %s --check-prefix=ASM +// ASM: error: '-fminimize-whitespace' invalid for input of type assembler-with-cpp diff --git a/clang/test/Preprocessor/minimize-whitespace.c b/clang/test/Preprocessor/minimize-whitespace.c new file mode 100644 --- /dev/null +++ b/clang/test/Preprocessor/minimize-whitespace.c @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -fminimize-whitespace -E %s 2>&1 | FileCheck %s --strict-whitespace --check-prefix=MINCOL +// RUN: %clang_cc1 -fminimize-whitespace -E -C %s 2>&1 | FileCheck %s --strict-whitespace --check-prefix=MINCCOL +// RUN: %clang_cc1 -fminimize-whitespace -E -P %s 2>&1 | FileCheck %s --strict-whitespace --check-prefix=MINWS +// RUN: %clang_cc1 -fminimize-whitespace -E -C -P %s 2>&1 | FileCheck %s --strict-whitespace --check-prefix=MINCWS + +#define NOT_OMP omp something +#define HASH # + + int a; /* span-comment */ + int b ; // line-comment + _Pragma ( "omp barrier" ) x // more line-comments + #pragma omp nothing // another comment +HASH pragma NOT_OMP + int e; // again a line + int \ +f ; + + +// MINCOL: {{^}}# 9 "{{.*}}minimize-whitespace.c"{{$}} +// MINCOL: {{^}}int a;{{$}} +// MINCOL-NEXT: {{^}}int b;{{$}} +// MINCOL-NEXT: {{^}}#pragma omp barrier{{$}} +// MINCOL-NEXT: # 11 "{{.*}}minimize-whitespace.c" +// MINCOL-NEXT: {{^}}x{{$}} +// MINCOL-NEXT: {{^}}#pragma omp nothing{{$}} +// MINCOL-NEXT: {{^ }}#pragma omp something{{$}} +// MINCOL-NEXT: {{^}}int e;{{$}} +// MINCOL-NEXT: {{^}}int f;{{$}} + +// FIXME: Comments after pragmas disappear, even without -fminimize-whitespace +// MINCCOL: {{^}}# 9 "{{.*}}minimize-whitespace.c"{{$}} +// MINCCOL: {{^}}int a;/* span-comment */{{$}} +// MINCCOL-NEXT: {{^}}int b;// line-comment{{$}} +// MINCCOL-NEXT: {{^}}#pragma omp barrier{{$}} +// MINCCOL-NEXT: # 11 "{{.*}}minimize-whitespace.c" +// MINCCOL-NEXT: {{^}}x// more line-comments{{$}} +// MINCCOL-NEXT: {{^}}#pragma omp nothing{{$}} +// MINCCOL-NEXT: {{^ }}#pragma omp something{{$}} +// MINCCOL-NEXT: {{^}}int e;// again a line{{$}} +// MINCCOL-NEXT: {{^}}int f;{{$}} + +// MINWS: {{^}}int a;int b;{{$}} +// MINWS-NEXT: {{^}}#pragma omp barrier{{$}} +// MINWS-NEXT: {{^}}x{{$}} +// MINWS-NEXT: {{^}}#pragma omp nothing{{$}} +// MINWS-NEXT: {{^ }}#pragma omp something int e;int f;{{$}} + +// FIXME: Comments after pragmas disappear, even without -fminimize-whitespace +// MINCWS: {{^}}int a;/* span-comment */int b;// line-comment{{$}} +// MINCWS-NEXT: {{^}}#pragma omp barrier{{$}} +// MINCWS-NEXT: {{^}}x// more line-comments{{$}} +// MINCWS-NEXT: {{^}}#pragma omp nothing{{$}} +// MINCWS-NEXT: {{^ }}#pragma omp something int e;// again a line{{$}} +// MINCWS-NEXT: {{^}}int f; + diff --git a/clang/test/Preprocessor/print_line_include.c b/clang/test/Preprocessor/print_line_include.c --- a/clang/test/Preprocessor/print_line_include.c +++ b/clang/test/Preprocessor/print_line_include.c @@ -2,5 +2,8 @@ // CHECK: int x; // CHECK-NEXT: int x; +// RUN: %clang_cc1 -E -P -fminimize-whitespace %s | FileCheck %s --check-prefix=MINWS --strict-whitespace +// MINWS: {{^}}int x;int x;{{$}} + #include "print_line_include.h" #include "print_line_include.h" diff --git a/clang/test/Preprocessor/stringize_space.c b/clang/test/Preprocessor/stringize_space.c --- a/clang/test/Preprocessor/stringize_space.c +++ b/clang/test/Preprocessor/stringize_space.c @@ -1,16 +1,18 @@ // RUN: %clang_cc1 -E %s | FileCheck --strict-whitespace %s +// RUN: %clang_cc1 -E -P -fminimize-whitespace %s | FileCheck --strict-whitespace %s --check-prefix=MINWS #define A(b) -#b , - #b , -# b , - # b A() // CHECK: {{^}}-"" , - "" , -"" , - ""{{$}} - +// MINWS: {{^}}-"",-"",-"",-"" #define t(x) #x t(a c) // CHECK: {{^}}"a c"{{$}} +// MINWS-SAME: "a c" #define str(x) #x #define f(x) str(-x) @@ -18,6 +20,7 @@ 1) // CHECK: {{^}}"-1" +// MINWS-SAME: "-1" #define paste(a,b) str(a