Index: cfe/trunk/include/clang/Frontend/PrecompiledPreamble.h =================================================================== --- cfe/trunk/include/clang/Frontend/PrecompiledPreamble.h +++ cfe/trunk/include/clang/Frontend/PrecompiledPreamble.h @@ -36,21 +36,6 @@ class DeclGroupRef; class PCHContainerOperations; -/// A size of the preamble and a flag required by -/// PreprocessorOptions::PrecompiledPreambleBytes. -struct PreambleBounds { - PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine) - : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {} - - /// \brief Size of the preamble in bytes. - unsigned Size; - /// \brief Whether the preamble ends at the start of a new line. - /// - /// Used to inform the lexer as to whether it's starting at the beginning of - /// a line after skipping the preamble. - bool PreambleEndsAtStartOfLine; -}; - /// \brief Runs lexer to compute suggested preamble bounds. PreambleBounds ComputePreambleBounds(const LangOptions &LangOpts, llvm::MemoryBuffer *Buffer, Index: cfe/trunk/include/clang/Lex/Lexer.h =================================================================== --- cfe/trunk/include/clang/Lex/Lexer.h +++ cfe/trunk/include/clang/Lex/Lexer.h @@ -39,6 +39,23 @@ CMK_Perforce }; +/// Describes the bounds (start, size) of the preamble and a flag required by +/// PreprocessorOptions::PrecompiledPreambleBytes. +/// The preamble includes the BOM, if any. +struct PreambleBounds { + PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine) + : Size(Size), + PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {} + + /// \brief Size of the preamble in bytes. + unsigned Size; + /// \brief Whether the preamble ends at the start of a new line. + /// + /// Used to inform the lexer as to whether it's starting at the beginning of + /// a line after skipping the preamble. + bool PreambleEndsAtStartOfLine; +}; + /// Lexer - This provides a simple interface that turns a text buffer into a /// stream of tokens. This provides no support for file reading or buffering, /// or buffering/seeking of tokens, only forward lexing is supported. It relies @@ -443,11 +460,11 @@ /// to fewer than this number of lines. /// /// \returns The offset into the file where the preamble ends and the rest - /// of the file begins along with a boolean value indicating whether + /// of the file begins along with a boolean value indicating whether /// the preamble ends at the beginning of a new line. - static std::pair ComputePreamble(StringRef Buffer, - const LangOptions &LangOpts, - unsigned MaxLines = 0); + static PreambleBounds ComputePreamble(StringRef Buffer, + const LangOptions &LangOpts, + unsigned MaxLines = 0); /// \brief Checks that the given token is the first token that occurs after /// the given location (this excludes comments and whitespace). Returns the @@ -618,7 +635,7 @@ //===--------------------------------------------------------------------===// // Other lexer functions. - void SkipBytes(unsigned Bytes, bool StartOfLine); + void SetByteOffset(unsigned Offset, bool StartOfLine); void PropagateLineStartLeadingSpaceInfo(Token &Result); Index: cfe/trunk/include/clang/Lex/PreprocessorOptions.h =================================================================== --- cfe/trunk/include/clang/Lex/PreprocessorOptions.h +++ cfe/trunk/include/clang/Lex/PreprocessorOptions.h @@ -160,7 +160,7 @@ DisablePCHValidation(false), AllowPCHWithCompilerErrors(false), DumpDeserializedPCHDecls(false), - PrecompiledPreambleBytes(0, true), + PrecompiledPreambleBytes(0, false), GeneratePreamble(false), RemappedFilesKeepOriginalName(true), RetainRemappedFileBuffers(false), @@ -195,7 +195,7 @@ LexEditorPlaceholders = true; RetainRemappedFileBuffers = true; PrecompiledPreambleBytes.first = 0; - PrecompiledPreambleBytes.second = 0; + PrecompiledPreambleBytes.second = false; } }; Index: cfe/trunk/lib/Frontend/FrontendActions.cpp =================================================================== --- cfe/trunk/lib/Frontend/FrontendActions.cpp +++ cfe/trunk/lib/Frontend/FrontendActions.cpp @@ -591,7 +591,7 @@ auto Buffer = CI.getFileManager().getBufferForFile(getCurrentFile()); if (Buffer) { unsigned Preamble = - Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).first; + Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).Size; llvm::outs().write((*Buffer)->getBufferStart(), Preamble); } } Index: cfe/trunk/lib/Frontend/PrecompiledPreamble.cpp =================================================================== --- cfe/trunk/lib/Frontend/PrecompiledPreamble.cpp +++ cfe/trunk/lib/Frontend/PrecompiledPreamble.cpp @@ -195,8 +195,7 @@ PreambleBounds clang::ComputePreambleBounds(const LangOptions &LangOpts, llvm::MemoryBuffer *Buffer, unsigned MaxLines) { - auto Pre = Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines); - return PreambleBounds(Pre.first, Pre.second); + return Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines); } llvm::ErrorOr PrecompiledPreamble::Build( Index: cfe/trunk/lib/Lex/Lexer.cpp =================================================================== --- cfe/trunk/lib/Lex/Lexer.cpp +++ cfe/trunk/lib/Lex/Lexer.cpp @@ -552,9 +552,9 @@ } // end anonymous namespace -std::pair Lexer::ComputePreamble(StringRef Buffer, - const LangOptions &LangOpts, - unsigned MaxLines) { +PreambleBounds Lexer::ComputePreamble(StringRef Buffer, + const LangOptions &LangOpts, + unsigned MaxLines) { // Create a lexer starting at the beginning of the file. Note that we use a // "fake" file source location at offset 1 so that the lexer will track our // position within the file. @@ -688,7 +688,7 @@ else End = TheTok.getLocation(); - return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(), + return PreambleBounds(End.getRawEncoding() - FileLoc.getRawEncoding(), TheTok.isAtStartOfLine()); } @@ -1394,9 +1394,9 @@ // Helper methods for lexing. //===----------------------------------------------------------------------===// -/// \brief Routine that indiscriminately skips bytes in the source file. -void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) { - BufferPtr += Bytes; +/// \brief Routine that indiscriminately sets the offset into the source file. +void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) { + BufferPtr = BufferStart + Offset; if (BufferPtr > BufferEnd) BufferPtr = BufferEnd; // FIXME: What exactly does the StartOfLine bit mean? There are two Index: cfe/trunk/lib/Lex/Preprocessor.cpp =================================================================== --- cfe/trunk/lib/Lex/Preprocessor.cpp +++ cfe/trunk/lib/Lex/Preprocessor.cpp @@ -516,9 +516,9 @@ // If we've been asked to skip bytes in the main file (e.g., as part of a // precompiled preamble), do so now. if (SkipMainFilePreamble.first > 0) - CurLexer->SkipBytes(SkipMainFilePreamble.first, - SkipMainFilePreamble.second); - + CurLexer->SetByteOffset(SkipMainFilePreamble.first, + SkipMainFilePreamble.second); + // Tell the header info that the main file was entered. If the file is later // #imported, it won't be re-entered. if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) Index: cfe/trunk/unittests/Frontend/PCHPreambleTest.cpp =================================================================== --- cfe/trunk/unittests/Frontend/PCHPreambleTest.cpp +++ cfe/trunk/unittests/Frontend/PCHPreambleTest.cpp @@ -153,4 +153,48 @@ ASSERT_EQ(initialCounts[2], GetFileReadCount(Header2)); } +TEST_F(PCHPreambleTest, ParseWithBom) { + std::string Header = "//./header.h"; + std::string Main = "//./main.cpp"; + AddFile(Header, "int random() { return 4; }"); + AddFile(Main, + "\xef\xbb\xbf" + "#include \"//./header.h\"\n" + "int main() { return random() -2; }"); + + std::unique_ptr AST(ParseAST(Main)); + ASSERT_TRUE(AST.get()); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + unsigned HeaderReadCount = GetFileReadCount(Header); + + ASSERT_TRUE(ReparseAST(AST)); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + // Check preamble PCH was really reused + ASSERT_EQ(HeaderReadCount, GetFileReadCount(Header)); + + // Remove BOM + RemapFile(Main, + "#include \"//./header.h\"\n" + "int main() { return random() -2; }"); + + ASSERT_TRUE(ReparseAST(AST)); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + ASSERT_LE(HeaderReadCount, GetFileReadCount(Header)); + HeaderReadCount = GetFileReadCount(Header); + + // Add BOM back + RemapFile(Main, + "\xef\xbb\xbf" + "#include \"//./header.h\"\n" + "int main() { return random() -2; }"); + + ASSERT_TRUE(ReparseAST(AST)); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + ASSERT_LE(HeaderReadCount, GetFileReadCount(Header)); +} + } // anonymous namespace