Index: include/clang/Frontend/PrecompiledPreamble.h =================================================================== --- include/clang/Frontend/PrecompiledPreamble.h +++ include/clang/Frontend/PrecompiledPreamble.h @@ -36,21 +36,6 @@ class DeclGroupRef; class PCHContainerOperations; -/// A size of the preamble and a flag required by -/// PreprocessorOptions::PrecompiledPreambleBytes. -struct PreambleBounds { - PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine) - : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {} - - /// \brief Size of the preamble in bytes. - unsigned Size; - /// \brief Whether the preamble ends at the start of a new line. - /// - /// Used to inform the lexer as to whether it's starting at the beginning of - /// a line after skipping the preamble. - bool PreambleEndsAtStartOfLine; -}; - /// \brief Runs lexer to compute suggested preamble bounds. PreambleBounds ComputePreambleBounds(const LangOptions &LangOpts, llvm::MemoryBuffer *Buffer, @@ -61,6 +46,7 @@ /// A class holding a PCH and all information to check whether it is valid to /// reuse the PCH for the subsequent runs. Use BuildPreamble to create PCH and /// CanReusePreamble + AddImplicitPreamble to make use of it. +/// The given memory buffer must never start with a BOM. class PrecompiledPreamble { class TempPCHFile; struct PreambleFileHash; Index: include/clang/Lex/Lexer.h =================================================================== --- include/clang/Lex/Lexer.h +++ include/clang/Lex/Lexer.h @@ -39,6 +39,22 @@ CMK_Perforce }; +/// The size of the preamble and a flag required by +/// PreprocessorOptions::PrecompiledPreambleBytes. +/// The preamble does not include the BOM, if any. +struct PreambleBounds { + PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine) + : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {} + + /// \brief Size of the preamble in bytes. + unsigned Size; + /// \brief Whether the preamble ends at the start of a new line. + /// + /// Used to inform the lexer as to whether it's starting at the beginning of + /// a line after skipping the preamble. + bool PreambleEndsAtStartOfLine; +}; + /// Lexer - This provides a simple interface that turns a text buffer into a /// stream of tokens. This provides no support for file reading or buffering, /// or buffering/seeking of tokens, only forward lexing is supported. It relies @@ -442,12 +458,11 @@ /// \param MaxLines If non-zero, restrict the length of the preamble /// to fewer than this number of lines. /// - /// \returns The offset into the file where the preamble ends and the rest - /// of the file begins along with a boolean value indicating whether - /// the preamble ends at the beginning of a new line. - static std::pair ComputePreamble(StringRef Buffer, - const LangOptions &LangOpts, - unsigned MaxLines = 0); + /// \returns The size of the preamble, and a boolean value indicating + /// whether the preamble ends at the beginning of a new line. + static PreambleBounds ComputePreamble(StringRef Buffer, + const LangOptions &LangOpts, + unsigned MaxLines = 0); /// \brief Checks that the given token is the first token that occurs after /// the given location (this excludes comments and whitespace). Returns the Index: include/clang/Lex/PreprocessorOptions.h =================================================================== --- include/clang/Lex/PreprocessorOptions.h +++ include/clang/Lex/PreprocessorOptions.h @@ -160,7 +160,7 @@ DisablePCHValidation(false), AllowPCHWithCompilerErrors(false), DumpDeserializedPCHDecls(false), - PrecompiledPreambleBytes(0, true), + PrecompiledPreambleBytes(0, false), GeneratePreamble(false), RemappedFilesKeepOriginalName(true), RetainRemappedFileBuffers(false), @@ -195,7 +195,7 @@ LexEditorPlaceholders = true; RetainRemappedFileBuffers = true; PrecompiledPreambleBytes.first = 0; - PrecompiledPreambleBytes.second = 0; + PrecompiledPreambleBytes.second = false; } }; Index: lib/Frontend/ASTUnit.cpp =================================================================== --- lib/Frontend/ASTUnit.cpp +++ lib/Frontend/ASTUnit.cpp @@ -1256,6 +1256,14 @@ if (!MainFileBuffer) return nullptr; + // If the preamble includes a BOM, we need to skip it, since we want + // the preamble to remain consistent if the BOM appears or disappears. + // This ensures offsets within and past the preamble stay valid. + if (MainFileBuffer->getBuffer().startswith("\xEF\xBB\xBF")) { + MainFileBuffer = llvm::MemoryBuffer::getMemBufferSlice( + std::move(MainFileBuffer), 3); + } + PreambleBounds Bounds = ComputePreambleBounds(*PreambleInvocationIn.getLangOpts(), MainFileBuffer.get(), MaxLines); Index: lib/Frontend/FrontendActions.cpp =================================================================== --- lib/Frontend/FrontendActions.cpp +++ lib/Frontend/FrontendActions.cpp @@ -591,7 +591,7 @@ auto Buffer = CI.getFileManager().getBufferForFile(getCurrentFile()); if (Buffer) { unsigned Preamble = - Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).first; + Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).Size; llvm::outs().write((*Buffer)->getBufferStart(), Preamble); } } Index: lib/Frontend/PrecompiledPreamble.cpp =================================================================== --- lib/Frontend/PrecompiledPreamble.cpp +++ lib/Frontend/PrecompiledPreamble.cpp @@ -195,8 +195,7 @@ PreambleBounds clang::ComputePreambleBounds(const LangOptions &LangOpts, llvm::MemoryBuffer *Buffer, unsigned MaxLines) { - auto Pre = Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines); - return PreambleBounds(Pre.first, Pre.second); + return Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines); } llvm::ErrorOr PrecompiledPreamble::Build( Index: lib/Lex/Lexer.cpp =================================================================== --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -552,9 +552,9 @@ } // end anonymous namespace -std::pair Lexer::ComputePreamble(StringRef Buffer, - const LangOptions &LangOpts, - unsigned MaxLines) { +PreambleBounds Lexer::ComputePreamble(StringRef Buffer, + const LangOptions &LangOpts, + unsigned MaxLines) { // Create a lexer starting at the beginning of the file. Note that we use a // "fake" file source location at offset 1 so that the lexer will track our // position within the file. @@ -688,7 +688,7 @@ else End = TheTok.getLocation(); - return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(), + return PreambleBounds(End.getRawEncoding() - StartLoc.getRawEncoding(), TheTok.isAtStartOfLine()); } Index: lib/Lex/Preprocessor.cpp =================================================================== --- lib/Lex/Preprocessor.cpp +++ lib/Lex/Preprocessor.cpp @@ -516,9 +516,9 @@ // If we've been asked to skip bytes in the main file (e.g., as part of a // precompiled preamble), do so now. if (SkipMainFilePreamble.first > 0) - CurLexer->SkipBytes(SkipMainFilePreamble.first, + CurLexer->SkipBytes(SkipMainFilePreamble.first, SkipMainFilePreamble.second); - + // Tell the header info that the main file was entered. If the file is later // #imported, it won't be re-entered. if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) Index: unittests/Frontend/PchPreambleTest.cpp =================================================================== --- unittests/Frontend/PchPreambleTest.cpp +++ unittests/Frontend/PchPreambleTest.cpp @@ -91,8 +91,19 @@ PreprocessorOptions &PPOpts = CI->getPreprocessorOpts(); PPOpts.RemappedFilesKeepOriginalName = true; + struct DebugConsumer : DiagnosticConsumer + { + void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, + const Diagnostic &Info) override { + DiagnosticConsumer::HandleDiagnostic(DiagLevel, Info); + SmallString<16> DiagText; + Info.FormatDiagnostic(DiagText); + llvm::errs() << DiagText << '\n'; + } + }; + IntrusiveRefCntPtr - Diags(CompilerInstance::createDiagnostics(new DiagnosticOptions, new DiagnosticConsumer)); + Diags(CompilerInstance::createDiagnostics(new DiagnosticOptions, new DebugConsumer)); FileManager *FileMgr = new FileManager(FSOpts, VFS); @@ -153,4 +164,70 @@ ASSERT_EQ(initialCounts[2], GetFileReadCount(Header2)); } +TEST_F(PchPreambleTest, ReparseWithDisappearingBom) { + std::string Header = "//./header.h"; + std::string Main = "//./main.cpp"; + AddFile(Header, "int random() { return 4; }"); + AddFile(Main, + "\xef\xbb\xbf" + "#include \"//./header.h\"\n" + "int main() { return random() * 2; }"); + + std::unique_ptr AST(ParseAST(Main)); + ASSERT_TRUE(AST.get()); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + // Check the file counts to make sure the preamble is + // really being used (and the header isn't being read + // again) + unsigned initialCounts[] = { + GetFileReadCount(Main), + GetFileReadCount(Header) + }; + + // Remove BOM but keep the rest the same + RemapFile(Main, + "#include \"//./header.h\"\n" + "int main() { return random() * 2; }"); + + ASSERT_TRUE(ReparseAST(AST)); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + ASSERT_EQ(initialCounts[0], GetFileReadCount(Main)); + ASSERT_EQ(initialCounts[1], GetFileReadCount(Header)); +} + +TEST_F(PchPreambleTest, ReparseWithAppearingBom) { + std::string Header = "//./header.h"; + std::string Main = "//./main.cpp"; + AddFile(Header, "int random() { return 4; }"); + AddFile(Main, + "#include \"//./header.h\"\n" + "int main() { return random() * 2; }"); + + std::unique_ptr AST(ParseAST(Main)); + ASSERT_TRUE(AST.get()); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + // Check the file counts to make sure the preamble is + // really being used (and the header isn't being read + // again) + unsigned initialCounts[] = { + GetFileReadCount(Main), + GetFileReadCount(Header) + }; + + // Suddenly, a BOM appeared + RemapFile(Main, + "\xef\xbb\xbf" + "#include \"//./header.h\"\n" + "int main() { return random() * 2; }"); + + ASSERT_TRUE(ReparseAST(AST)); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + ASSERT_EQ(initialCounts[0], GetFileReadCount(Main)); + ASSERT_EQ(initialCounts[1], GetFileReadCount(Header)); +} + } // anonymous namespace