Index: include/clang/Frontend/PrecompiledPreamble.h =================================================================== --- include/clang/Frontend/PrecompiledPreamble.h +++ include/clang/Frontend/PrecompiledPreamble.h @@ -36,21 +36,6 @@ class DeclGroupRef; class PCHContainerOperations; -/// A size of the preamble and a flag required by -/// PreprocessorOptions::PrecompiledPreambleBytes. -struct PreambleBounds { - PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine) - : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {} - - /// \brief Size of the preamble in bytes. - unsigned Size; - /// \brief Whether the preamble ends at the start of a new line. - /// - /// Used to inform the lexer as to whether it's starting at the beginning of - /// a line after skipping the preamble. - bool PreambleEndsAtStartOfLine; -}; - /// \brief Runs lexer to compute suggested preamble bounds. PreambleBounds ComputePreambleBounds(const LangOptions &LangOpts, llvm::MemoryBuffer *Buffer, @@ -111,10 +96,16 @@ void AddImplicitPreamble(CompilerInvocation &CI, llvm::MemoryBuffer *MainFileBuffer) const; + /// Updates the preamble offset. This is necessary when the preamble can be + /// reused for another parse, but its offset has changed (e.g. when a BOM + /// appears or disappears between parses). + void UpdateOffset(unsigned NewOffset); + private: PrecompiledPreamble(TempPCHFile PCHFile, std::vector PreambleBytes, bool PreambleEndsAtStartOfLine, - llvm::StringMap FilesInPreamble); + llvm::StringMap FilesInPreamble, + unsigned PreambleOffset); /// A temp file that would be deleted on destructor call. If destructor is not /// called for any reason, the file will be deleted at static objects' @@ -197,6 +188,8 @@ std::vector PreambleBytes; /// See PreambleBounds::PreambleEndsAtStartOfLine bool PreambleEndsAtStartOfLine; + /// The start offset of the bounds used to build the preamble. + unsigned PreambleOffset; }; /// A set of callbacks to gather useful information while building a preamble. Index: include/clang/Lex/Lexer.h =================================================================== --- include/clang/Lex/Lexer.h +++ include/clang/Lex/Lexer.h @@ -39,6 +39,24 @@ CMK_Perforce }; +/// Describes the bounds (start, size) of the preamble and a flag required by +/// PreprocessorOptions::PrecompiledPreambleBytes. +struct PreambleBounds { + PreambleBounds(unsigned Offset, unsigned Size, bool PreambleEndsAtStartOfLine) + : StartOffset(Offset), Size(Size), + PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {} + + /// \brief Start offset of the preamble, in bytes. + unsigned StartOffset; + /// \brief Size of the preamble in bytes. + unsigned Size; + /// \brief Whether the preamble ends at the start of a new line. + /// + /// Used to inform the lexer as to whether it's starting at the beginning of + /// a line after skipping the preamble. + bool PreambleEndsAtStartOfLine; +}; + /// Lexer - This provides a simple interface that turns a text buffer into a /// stream of tokens. This provides no support for file reading or buffering, /// or buffering/seeking of tokens, only forward lexing is supported. It relies @@ -442,12 +460,12 @@ /// \param MaxLines If non-zero, restrict the length of the preamble /// to fewer than this number of lines. /// - /// \returns The offset into the file where the preamble ends and the rest - /// of the file begins along with a boolean value indicating whether - /// the preamble ends at the beginning of a new line. - static std::pair ComputePreamble(StringRef Buffer, - const LangOptions &LangOpts, - unsigned MaxLines = 0); + /// \returns The offset into the file where the preamble starts, its size, + /// and a boolean value indicating whether the preamble ends at the + /// beginning of a new line. + static PreambleBounds ComputePreamble(StringRef Buffer, + const LangOptions &LangOpts, + unsigned MaxLines = 0); /// \brief Checks that the given token is the first token that occurs after /// the given location (this excludes comments and whitespace). Returns the @@ -618,7 +636,7 @@ //===--------------------------------------------------------------------===// // Other lexer functions. - void SkipBytes(unsigned Bytes, bool StartOfLine); + void SetByteOffset(unsigned Offset, bool StartOfLine); void PropagateLineStartLeadingSpaceInfo(Token &Result); Index: include/clang/Lex/PreprocessorOptions.h =================================================================== --- include/clang/Lex/PreprocessorOptions.h +++ include/clang/Lex/PreprocessorOptions.h @@ -160,7 +160,7 @@ DisablePCHValidation(false), AllowPCHWithCompilerErrors(false), DumpDeserializedPCHDecls(false), - PrecompiledPreambleBytes(0, true), + PrecompiledPreambleBytes(0, false), GeneratePreamble(false), RemappedFilesKeepOriginalName(true), RetainRemappedFileBuffers(false), @@ -195,7 +195,7 @@ LexEditorPlaceholders = true; RetainRemappedFileBuffers = true; PrecompiledPreambleBytes.first = 0; - PrecompiledPreambleBytes.second = 0; + PrecompiledPreambleBytes.second = false; } }; Index: lib/Frontend/ASTUnit.cpp =================================================================== --- lib/Frontend/ASTUnit.cpp +++ lib/Frontend/ASTUnit.cpp @@ -1266,6 +1266,7 @@ if (Preamble->CanReuse(PreambleInvocationIn, MainFileBuffer.get(), Bounds, VFS.get())) { // Okay! We can re-use the precompiled preamble. + Preamble->UpdateOffset(Bounds.StartOffset); // Set the state of the diagnostic object to mimic its state // after parsing the preamble. Index: lib/Frontend/FrontendActions.cpp =================================================================== --- lib/Frontend/FrontendActions.cpp +++ lib/Frontend/FrontendActions.cpp @@ -590,8 +590,9 @@ CompilerInstance &CI = getCompilerInstance(); auto Buffer = CI.getFileManager().getBufferForFile(getCurrentFile()); if (Buffer) { - unsigned Preamble = - Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).first; + PreambleBounds Bounds = + Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()); + unsigned Preamble = Bounds.StartOffset + Bounds.Size; llvm::outs().write((*Buffer)->getBufferStart(), Preamble); } } Index: lib/Frontend/PrecompiledPreamble.cpp =================================================================== --- lib/Frontend/PrecompiledPreamble.cpp +++ lib/Frontend/PrecompiledPreamble.cpp @@ -195,8 +195,7 @@ PreambleBounds clang::ComputePreambleBounds(const LangOptions &LangOpts, llvm::MemoryBuffer *Buffer, unsigned MaxLines) { - auto Pre = Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines); - return PreambleBounds(Pre.first, Pre.second); + return Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines); } llvm::ErrorOr PrecompiledPreamble::Build( @@ -224,9 +223,9 @@ // Save the preamble text for later; we'll need to compare against it for // subsequent reparses. - std::vector PreambleBytes(MainFileBuffer->getBufferStart(), - MainFileBuffer->getBufferStart() + - Bounds.Size); + auto PreambleStart = MainFileBuffer->getBufferStart() + Bounds.StartOffset; + std::vector PreambleBytes(PreambleStart, + PreambleStart + Bounds.Size); bool PreambleEndsAtStartOfLine = Bounds.PreambleEndsAtStartOfLine; // Tell the compiler invocation to generate a temporary precompiled header. @@ -289,8 +288,10 @@ // Remap the main source file to the preamble buffer. StringRef MainFilePath = FrontendOpts.Inputs[0].getFile(); + auto StartOffset = Bounds.StartOffset; + auto EndOffset = StartOffset + Bounds.Size; auto PreambleInputBuffer = llvm::MemoryBuffer::getMemBufferCopy( - MainFileBuffer->getBuffer().slice(0, Bounds.Size), MainFilePath); + MainFileBuffer->getBuffer().slice(StartOffset, EndOffset), MainFilePath); if (PreprocessorOpts.RetainRemappedFileBuffers) { // MainFileBuffer will be deleted by unique_ptr after leaving the method. PreprocessorOpts.addRemappedFile(MainFilePath, PreambleInputBuffer.get()); @@ -338,11 +339,13 @@ return PrecompiledPreamble( std::move(*PreamblePCHFile), std::move(PreambleBytes), - PreambleEndsAtStartOfLine, std::move(FilesInPreamble)); + PreambleEndsAtStartOfLine, std::move(FilesInPreamble), + Bounds.StartOffset); } PreambleBounds PrecompiledPreamble::getBounds() const { - return PreambleBounds(PreambleBytes.size(), PreambleEndsAtStartOfLine); + return PreambleBounds(PreambleOffset, PreambleBytes.size(), + PreambleEndsAtStartOfLine); } bool PrecompiledPreamble::CanReuse(const CompilerInvocation &Invocation, @@ -351,7 +354,7 @@ vfs::FileSystem *VFS) const { assert( - Bounds.Size <= MainFileBuffer->getBufferSize() && + Bounds.StartOffset + Bounds.Size <= MainFileBuffer->getBufferSize() && "Buffer is too large. Bounds were calculated from a different buffer?"); auto PreambleInvocation = std::make_shared(Invocation); @@ -367,7 +370,8 @@ // new main file. if (PreambleBytes.size() != Bounds.Size || PreambleEndsAtStartOfLine != Bounds.PreambleEndsAtStartOfLine || - memcmp(PreambleBytes.data(), MainFileBuffer->getBufferStart(), + memcmp(PreambleBytes.data(), + MainFileBuffer->getBufferStart() + Bounds.StartOffset, Bounds.Size) != 0) return false; // The preamble has not changed. We may be able to re-use the precompiled @@ -430,7 +434,8 @@ auto &PreprocessorOpts = CI.getPreprocessorOpts(); // Configure ImpicitPCHInclude. - PreprocessorOpts.PrecompiledPreambleBytes.first = PreambleBytes.size(); + PreprocessorOpts.PrecompiledPreambleBytes.first = PreambleOffset + + PreambleBytes.size(); PreprocessorOpts.PrecompiledPreambleBytes.second = PreambleEndsAtStartOfLine; PreprocessorOpts.ImplicitPCHInclude = PCHFile.getFilePath(); PreprocessorOpts.DisablePCHValidation = true; @@ -440,13 +445,20 @@ PreprocessorOpts.addRemappedFile(MainFilePath, MainFileBuffer); } +void PrecompiledPreamble::UpdateOffset(unsigned NewOffset) +{ + PreambleOffset = NewOffset; +} + PrecompiledPreamble::PrecompiledPreamble( TempPCHFile PCHFile, std::vector PreambleBytes, bool PreambleEndsAtStartOfLine, - llvm::StringMap FilesInPreamble) + llvm::StringMap FilesInPreamble, + unsigned PreambleOffset) : PCHFile(std::move(PCHFile)), FilesInPreamble(FilesInPreamble), PreambleBytes(std::move(PreambleBytes)), - PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {} + PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine), + PreambleOffset(PreambleOffset) {} llvm::ErrorOr PrecompiledPreamble::TempPCHFile::CreateNewPreamblePCHFile() { Index: lib/Lex/Lexer.cpp =================================================================== --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -552,9 +552,9 @@ } // end anonymous namespace -std::pair Lexer::ComputePreamble(StringRef Buffer, - const LangOptions &LangOpts, - unsigned MaxLines) { +PreambleBounds Lexer::ComputePreamble(StringRef Buffer, + const LangOptions &LangOpts, + unsigned MaxLines) { // Create a lexer starting at the beginning of the file. Note that we use a // "fake" file source location at offset 1 so that the lexer will track our // position within the file. @@ -688,7 +688,8 @@ else End = TheTok.getLocation(); - return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(), + return PreambleBounds(StartLoc.getRawEncoding() - FileLoc.getRawEncoding(), + End.getRawEncoding() - StartLoc.getRawEncoding(), TheTok.isAtStartOfLine()); } @@ -1394,9 +1395,9 @@ // Helper methods for lexing. //===----------------------------------------------------------------------===// -/// \brief Routine that indiscriminately skips bytes in the source file. -void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) { - BufferPtr += Bytes; +/// \brief Routine that indiscriminately sets the offset into the source file. +void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) { + BufferPtr = BufferStart + Offset; if (BufferPtr > BufferEnd) BufferPtr = BufferEnd; // FIXME: What exactly does the StartOfLine bit mean? There are two Index: lib/Lex/Preprocessor.cpp =================================================================== --- lib/Lex/Preprocessor.cpp +++ lib/Lex/Preprocessor.cpp @@ -516,9 +516,9 @@ // If we've been asked to skip bytes in the main file (e.g., as part of a // precompiled preamble), do so now. if (SkipMainFilePreamble.first > 0) - CurLexer->SkipBytes(SkipMainFilePreamble.first, - SkipMainFilePreamble.second); - + CurLexer->SetByteOffset(SkipMainFilePreamble.first, + SkipMainFilePreamble.second); + // Tell the header info that the main file was entered. If the file is later // #imported, it won't be re-entered. if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) Index: unittests/Frontend/PchPreambleTest.cpp =================================================================== --- unittests/Frontend/PchPreambleTest.cpp +++ unittests/Frontend/PchPreambleTest.cpp @@ -91,8 +91,19 @@ PreprocessorOptions &PPOpts = CI->getPreprocessorOpts(); PPOpts.RemappedFilesKeepOriginalName = true; + struct DebugConsumer : DiagnosticConsumer + { + void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, + const Diagnostic &Info) override { + DiagnosticConsumer::HandleDiagnostic(DiagLevel, Info); + SmallString<16> DiagText; + Info.FormatDiagnostic(DiagText); + llvm::errs() << DiagText << '\n'; + } + }; + IntrusiveRefCntPtr - Diags(CompilerInstance::createDiagnostics(new DiagnosticOptions, new DiagnosticConsumer)); + Diags(CompilerInstance::createDiagnostics(new DiagnosticOptions, new DebugConsumer)); FileManager *FileMgr = new FileManager(FSOpts, VFS); @@ -153,4 +164,64 @@ ASSERT_EQ(initialCounts[2], GetFileReadCount(Header2)); } +TEST_F(PchPreambleTest, ReparseWithDisappearingBom) { + std::string Header = "//./header.h"; + std::string Main = "//./main.cpp"; + AddFile(Header, "int random() { return 4; }"); + AddFile(Main, + "\xef\xbb\xbf" + "#include \"//./header.h\"\n" + "int main() { return random() * 2; }"); + + std::unique_ptr AST(ParseAST(Main)); + ASSERT_TRUE(AST.get()); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + unsigned initialCounts[] = { + GetFileReadCount(Main), + GetFileReadCount(Header) + }; + + // Remove BOM but keep the rest the same + RemapFile(Main, + "#include \"//./header.h\"\n" + "int main() { return random() * 2; }"); + + ASSERT_TRUE(ReparseAST(AST)); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + ASSERT_EQ(initialCounts[0], GetFileReadCount(Main)); + ASSERT_EQ(initialCounts[1], GetFileReadCount(Header)); +} + +TEST_F(PchPreambleTest, ReparseWithAppearingBom) { + std::string Header = "//./header.h"; + std::string Main = "//./main.cpp"; + AddFile(Header, "int random() { return 4; }"); + AddFile(Main, + "#include \"//./header.h\"\n" + "int main() { return random() * 2; }"); + + std::unique_ptr AST(ParseAST(Main)); + ASSERT_TRUE(AST.get()); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + unsigned initialCounts[] = { + GetFileReadCount(Main), + GetFileReadCount(Header) + }; + + // Suddenly, a BOM appeared + RemapFile(Main, + "\xef\xbb\xbf" + "#include \"//./header.h\"\n" + "int main() { return random() * 2; }"); + + ASSERT_TRUE(ReparseAST(AST)); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + ASSERT_EQ(initialCounts[0], GetFileReadCount(Main)); + ASSERT_EQ(initialCounts[1], GetFileReadCount(Header)); +} + } // anonymous namespace