diff --git a/clang/include/clang/Basic/DiagnosticSerializationKinds.td b/clang/include/clang/Basic/DiagnosticSerializationKinds.td --- a/clang/include/clang/Basic/DiagnosticSerializationKinds.td +++ b/clang/include/clang/Basic/DiagnosticSerializationKinds.td @@ -18,13 +18,16 @@ def err_fe_pch_malformed_block : Error< "malformed block record in PCH file: '%0'">, DefaultFatal; def err_fe_pch_file_modified : Error< - "file '%0' has been modified since the precompiled header '%1' was built">, + "file '%0' has been modified since the precompiled header '%1' was built" + ": %select{size|mtime|content}2 changed">, DefaultFatal; def err_fe_module_file_modified : Error< - "file '%0' has been modified since the module file '%1' was built">, + "file '%0' has been modified since the module file '%1' was built" + ": %select{size|mtime|content}2 changed">, DefaultFatal; def err_fe_ast_file_modified : Error< - "file '%0' has been modified since the AST file '%1' was built">, + "file '%0' has been modified since the AST file '%1' was built" + ": %select{size|mtime|content}2 changed">, DefaultFatal; def err_fe_pch_file_overridden : Error< "file '%0' from the precompiled header has been overridden">; @@ -399,6 +402,8 @@ def err_module_no_size_mtime_for_header : Error< "cannot emit module %0: %select{size|mtime}1 must be explicitly specified " "for missing header file \"%2\"">; +def err_module_unable_to_hash_content : Error< + "failed to hash content for '%0' because memory buffer cannot be retrieved">; } // let CategoryName } // let Component diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1368,6 +1368,28 @@ HelpText<"Validate the system headers that a module depends on when loading the module">; def fno_modules_validate_system_headers : Flag<["-"], "fno-modules-validate-system-headers">, Group, Flags<[DriverOption]>; + +def fvalidate_ast_input_files_content: + Flag <["-"], "fvalidate-ast-input-files-content">, + Group, Flags<[CC1Option]>, + HelpText<"Compute and store the hash of input files used to build an AST." + " Files with mismatching mtime's are considered valid" + " if both contents is identical">; +def fmodules_validate_input_files_content: + Flag <["-"], "fmodules-validate-input-files-content">, + Group, Flags<[DriverOption]>, + HelpText<"Validate PCM input files based on content if mtime differs">; +def fno_modules_validate_input_files_content: + Flag <["-"], "fno_modules-validate-input-files-content">, + Group, Flags<[DriverOption]>; +def fpch_validate_input_files_content: + Flag <["-"], "fpch-validate-input-files-content">, + Group, Flags<[DriverOption]>, + HelpText<"Validate PCH input files based on content if mtime differs">; +def fno_pch_validate_input_files_content: + Flag <["-"], "fno_pch-validate-input-files-content">, + Group, Flags<[DriverOption]>; + def fmodules : Flag <["-"], "fmodules">, Group, Flags<[DriverOption, CC1Option]>, HelpText<"Enable the 'modules' language feature">; diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h --- a/clang/include/clang/Lex/HeaderSearchOptions.h +++ b/clang/include/clang/Lex/HeaderSearchOptions.h @@ -195,6 +195,10 @@ /// Whether to validate system input files when a module is loaded. unsigned ModulesValidateSystemHeaders : 1; + // Whether the content of input files should be hashed and used to + // validate consistency. + unsigned ValidateASTInputFilesContent : 1; + /// Whether the module includes debug information (-gmodules). unsigned UseDebugInfo : 1; @@ -208,7 +212,8 @@ UseBuiltinIncludes(true), UseStandardSystemIncludes(true), UseStandardCXXIncludes(true), UseLibcxx(false), Verbose(false), ModulesValidateOncePerBuildSession(false), - ModulesValidateSystemHeaders(false), UseDebugInfo(false), + ModulesValidateSystemHeaders(false), + ValidateASTInputFilesContent(false), UseDebugInfo(false), ModulesValidateDiagnosticOptions(true), ModulesHashContent(false) {} /// AddPath - Add the \p Path path to the specified \p Group list. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -382,7 +382,10 @@ /// inside the control block. enum InputFileRecordTypes { /// An input file. - INPUT_FILE = 1 + INPUT_FILE = 1, + + /// The input file content hash + INPUT_FILE_HASH }; /// Record types that occur within the AST block itself. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -930,6 +930,9 @@ /// Whether validate system input files. bool ValidateSystemInputs; + /// Whether validate headers and module maps using hash based on contents. + bool ValidateASTInputFilesContent; + /// Whether we are allowed to use the global module index. bool UseGlobalIndex; @@ -1203,6 +1206,7 @@ struct InputFileInfo { std::string Filename; + uint64_t ContentHash; off_t StoredSize; time_t StoredTime; bool Overridden; @@ -1437,6 +1441,8 @@ void Error(StringRef Msg) const; void Error(unsigned DiagID, StringRef Arg1 = StringRef(), StringRef Arg2 = StringRef()) const; + void Error(unsigned DiagID, StringRef Arg1, StringRef Arg2, + unsigned Select) const; void Error(llvm::Error &&Err) const; public: @@ -1485,7 +1491,9 @@ StringRef isysroot = "", bool DisableValidation = false, bool AllowASTWithCompilerErrors = false, bool AllowConfigurationMismatch = false, - bool ValidateSystemInputs = false, bool UseGlobalIndex = true, + bool ValidateSystemInputs = false, + bool ValidateASTInputFilesContent = false, + bool UseGlobalIndex = true, std::unique_ptr ReadTimer = {}); ASTReader(const ASTReader &) = delete; ASTReader &operator=(const ASTReader &) = delete; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2777,6 +2777,10 @@ std::string("-fprebuilt-module-path=") + A->getValue())); A->claim(); } + if (Args.hasFlag(options::OPT_fmodules_validate_input_files_content, + options::OPT_fno_modules_validate_input_files_content, + false)) + CmdArgs.push_back("-fvalidate-ast-input-files-content"); } // -fmodule-name specifies the module that is currently being built (or @@ -4899,6 +4903,10 @@ Std && (Std->containsValue("c++2a") || Std->containsValue("c++latest")); RenderModulesOptions(C, D, Args, Input, Output, CmdArgs, HaveModules); + if (Args.hasFlag(options::OPT_fpch_validate_input_files_content, + options::OPT_fno_pch_validate_input_files_content, false)) + CmdArgs.push_back("-fvalidate-ast-input-files-content"); + Args.AddLastArg(CmdArgs, options::OPT_fexperimental_new_pass_manager, options::OPT_fno_experimental_new_pass_manager); diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -510,7 +510,8 @@ PP, ModuleCache, &Context, PCHContainerRdr, Extensions, Sysroot.empty() ? "" : Sysroot.data(), DisablePCHValidation, AllowPCHWithCompilerErrors, /*AllowConfigurationMismatch*/ false, - HSOpts.ModulesValidateSystemHeaders, UseGlobalModuleIndex)); + HSOpts.ModulesValidateSystemHeaders, HSOpts.ValidateASTInputFilesContent, + UseGlobalModuleIndex)); // We need the external source to be set up before we read the AST, because // eagerly-deserialized declarations may use it. @@ -1492,6 +1493,7 @@ /*AllowASTWithCompilerErrors=*/false, /*AllowConfigurationMismatch=*/false, HSOpts.ModulesValidateSystemHeaders, + HSOpts.ValidateASTInputFilesContent, getFrontendOpts().UseGlobalModuleIndex, std::move(ReadTimer)); if (hasASTConsumer()) { ModuleManager->setDeserializationListener( diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2081,6 +2081,8 @@ getLastArgUInt64Value(Args, OPT_fbuild_session_timestamp, 0); Opts.ModulesValidateSystemHeaders = Args.hasArg(OPT_fmodules_validate_system_headers); + Opts.ValidateASTInputFilesContent = + Args.hasArg(OPT_fvalidate_ast_input_files_content); if (const Arg *A = Args.getLastArg(OPT_fmodule_format_EQ)) Opts.ModuleFormat = A->getValue(); diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1247,6 +1247,12 @@ Diag(DiagID) << Arg1 << Arg2; } +void ASTReader::Error(unsigned DiagID, StringRef Arg1, StringRef Arg2, + unsigned Select) const { + if (!Diags.isDiagnosticInFlight()) + Diag(DiagID) << Arg1 << Arg2 << Select; +} + void ASTReader::Error(llvm::Error &&Err) const { Error(toString(std::move(Err))); } @@ -2241,6 +2247,24 @@ R.TopLevelModuleMap = static_cast(Record[5]); R.Filename = Blob; ResolveImportedPath(F, R.Filename); + + Expected MaybeEntry = Cursor.advance(); + if (!MaybeEntry) // FIXME this drops errors on the floor. + consumeError(MaybeEntry.takeError()); + llvm::BitstreamEntry Entry = MaybeEntry.get(); + assert(Entry.Kind == llvm::BitstreamEntry::Record && + "expected record type for input file hash"); + + Record.clear(); + if (Expected Maybe = Cursor.readRecord(Entry.ID, Record)) + assert(static_cast(Maybe.get()) == INPUT_FILE_HASH && + "invalid record type for input file hash"); + else { + // FIXME this drops errors on the floor. + consumeError(Maybe.takeError()); + } + R.ContentHash = (static_cast(Record[1]) << 32) | + static_cast(Record[0]); return R; } @@ -2271,6 +2295,7 @@ bool Overridden = FI.Overridden; bool Transient = FI.Transient; StringRef Filename = FI.Filename; + uint64_t StoredContentHash = FI.ContentHash; const FileEntry *File = nullptr; if (auto FE = FileMgr.getFile(Filename, /*OpenFile=*/false)) @@ -2325,14 +2350,46 @@ } } - bool IsOutOfDate = false; + enum ModificationType { + Size, + ModTime, + Content, + None, + }; + auto HasInputFileChanged = [&]() { + if (StoredSize != File->getSize()) + return ModificationType::Size; + if (!DisableValidation && StoredTime && + StoredTime != File->getModificationTime()) { + // In case the modification time changes but not the content, + // accept the cached file as legit. + if (ValidateASTInputFilesContent && + StoredContentHash != static_cast(llvm::hash_code(-1))) { + auto MemBuffOrError = FileMgr.getBufferForFile(File); + if (!MemBuffOrError) { + if (!Complain) + return ModificationType::ModTime; + std::string ErrorStr = "could not get buffer for file '"; + ErrorStr += File->getName(); + ErrorStr += "'"; + Error(ErrorStr); + return ModificationType::ModTime; + } + auto ContentHash = hash_value(MemBuffOrError.get()->getBuffer()); + if (StoredContentHash == static_cast(ContentHash)) + return ModificationType::None; + return ModificationType::Content; + } + return ModificationType::ModTime; + } + return ModificationType::None; + }; + + bool IsOutOfDate = false; + auto FileChange = HasInputFileChanged(); // For an overridden file, there is nothing to validate. - if (!Overridden && // - (StoredSize != File->getSize() || - (StoredTime && StoredTime != File->getModificationTime() && - !DisableValidation) - )) { + if (!Overridden && FileChange != ModificationType::None) { if (Complain) { // Build a list of the PCH imports that got us here (in reverse). SmallVector ImportStack(1, &F); @@ -2341,13 +2398,17 @@ // The top-level PCH is stale. StringRef TopLevelPCHName(ImportStack.back()->FileName); - unsigned DiagnosticKind = moduleKindForDiagnostic(ImportStack.back()->Kind); + unsigned DiagnosticKind = + moduleKindForDiagnostic(ImportStack.back()->Kind); if (DiagnosticKind == 0) - Error(diag::err_fe_pch_file_modified, Filename, TopLevelPCHName); + Error(diag::err_fe_pch_file_modified, Filename, TopLevelPCHName, + (unsigned)FileChange); else if (DiagnosticKind == 1) - Error(diag::err_fe_module_file_modified, Filename, TopLevelPCHName); + Error(diag::err_fe_module_file_modified, Filename, TopLevelPCHName, + (unsigned)FileChange); else - Error(diag::err_fe_ast_file_modified, Filename, TopLevelPCHName); + Error(diag::err_fe_ast_file_modified, Filename, TopLevelPCHName, + (unsigned)FileChange); // Print the import stack. if (ImportStack.size() > 1 && !Diags.isDiagnosticInFlight()) { @@ -5192,6 +5253,8 @@ consumeError(MaybeRecordType.takeError()); } switch ((InputFileRecordTypes)MaybeRecordType.get()) { + case INPUT_FILE_HASH: + break; case INPUT_FILE: bool Overridden = static_cast(Record[3]); std::string Filename = Blob; @@ -12153,7 +12216,7 @@ StringRef isysroot, bool DisableValidation, bool AllowASTWithCompilerErrors, bool AllowConfigurationMismatch, bool ValidateSystemInputs, - bool UseGlobalIndex, + bool ValidateASTInputFilesContent, bool UseGlobalIndex, std::unique_ptr ReadTimer) : Listener(DisableValidation ? cast(new SimpleASTReaderListener(PP)) @@ -12167,6 +12230,7 @@ AllowASTWithCompilerErrors(AllowASTWithCompilerErrors), AllowConfigurationMismatch(AllowConfigurationMismatch), ValidateSystemInputs(ValidateSystemInputs), + ValidateASTInputFilesContent(ValidateASTInputFilesContent), UseGlobalIndex(UseGlobalIndex), CurrSwitchCaseStmts(&SwitchCaseStmts) { SourceMgr.setExternalSLocEntrySource(this); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1099,6 +1099,7 @@ BLOCK(INPUT_FILES_BLOCK); RECORD(INPUT_FILE); + RECORD(INPUT_FILE_HASH); // AST Top-Level Block. BLOCK(AST_BLOCK); @@ -1764,6 +1765,7 @@ bool IsTransient; bool BufferOverridden; bool IsTopLevelModuleMap; + uint32_t ContentHash[2]; }; } // namespace @@ -1787,6 +1789,13 @@ IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name unsigned IFAbbrevCode = Stream.EmitAbbrev(std::move(IFAbbrev)); + // Create input file hash abbreviation. + auto IFHAbbrev = std::make_shared(); + IFHAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE_HASH)); + IFHAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + IFHAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + unsigned IFHAbbrevCode = Stream.EmitAbbrev(std::move(IFHAbbrev)); + // Get all ContentCache objects for files, sorted by whether the file is a // system one or not. System files go at the back, users files at the front. std::deque SortedFiles; @@ -1810,6 +1819,25 @@ Entry.BufferOverridden = Cache->BufferOverridden; Entry.IsTopLevelModuleMap = isModuleMap(File.getFileCharacteristic()) && File.getIncludeLoc().isInvalid(); + + auto ContentHash = hash_code(-1); + if (PP->getHeaderSearchInfo() + .getHeaderSearchOpts() + .ValidateASTInputFilesContent) { + auto *MemBuff = Cache->getRawBuffer(); + if (MemBuff) + ContentHash = hash_value(MemBuff->getBuffer()); + else + // FIXME: The path should be taken from the FileEntryRef. + PP->Diag(SourceLocation(), diag::err_module_unable_to_hash_content) + << Entry.File->getName(); + } + auto CH = llvm::APInt(64, ContentHash); + Entry.ContentHash[0] = + static_cast(CH.getLoBits(32).getZExtValue()); + Entry.ContentHash[1] = + static_cast(CH.getHiBits(32).getZExtValue()); + if (Entry.IsSystemFile) SortedFiles.push_back(Entry); else @@ -1834,17 +1862,26 @@ // Emit size/modification time for this file. // And whether this file was overridden. - RecordData::value_type Record[] = { - INPUT_FILE, - InputFileOffsets.size(), - (uint64_t)Entry.File->getSize(), - (uint64_t)getTimestampForOutput(Entry.File), - Entry.BufferOverridden, - Entry.IsTransient, - Entry.IsTopLevelModuleMap}; - - // FIXME: The path should be taken from the FileEntryRef. - EmitRecordWithPath(IFAbbrevCode, Record, Entry.File->getName()); + { + RecordData::value_type Record[] = { + INPUT_FILE, + InputFileOffsets.size(), + (uint64_t)Entry.File->getSize(), + (uint64_t)getTimestampForOutput(Entry.File), + Entry.BufferOverridden, + Entry.IsTransient, + Entry.IsTopLevelModuleMap}; + + // FIXME: The path should be taken from the FileEntryRef. + EmitRecordWithPath(IFAbbrevCode, Record, Entry.File->getName()); + } + + // Emit content hash for this file. + { + RecordData::value_type Record[] = {INPUT_FILE_HASH, Entry.ContentHash[0], + Entry.ContentHash[1]}; + Stream.EmitRecordWithAbbrev(IFHAbbrevCode, Record); + } } Stream.ExitBlock(); diff --git a/clang/test/Modules/validate-file-content.m b/clang/test/Modules/validate-file-content.m new file mode 100644 --- /dev/null +++ b/clang/test/Modules/validate-file-content.m @@ -0,0 +1,33 @@ +// REQUIRES: shell +// +// Check driver works +// RUN: %clang -fmodules -fsyntax-only -fmodules-validate-input-files-content %s -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s +// CHECK-CC1: -fvalidate-ast-input-files-content +// +// PCH+Modules: Test that a mtime mismatch without content change is fine +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo '// m.h' > %t/m.h +// RUN: echo '#include "m.h"' > %t/a.h +// RUN: echo 'module m { header "m.h" }' > %t/module.modulemap +// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content +// RUN: touch -m -a -t 202901010000 %t/m.h +// RUN: %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t -include-pch %t/a.pch %s -verify -fvalidate-ast-input-files-content +// +// PCH+Modules: Test that a mtime mismatch with content change +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo '// m.h' > %t/m.h +// RUN: echo '#include "m.h"' > %t/a.h +// RUN: echo 'module m { header "m.h" }' > %t/module.modulemap +// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content +// RUN: echo '// m.x' > %t/m.h +// RUN: touch -m -a -t 202901010000 %t/m.h +// RUN: not %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t -include-pch %t/a.pch %s -fvalidate-ast-input-files-content 2> %t/stderr +// RUN: FileCheck %s < %t/stderr +// +// CHECK: file '[[M_H:.*[/\\]m\.h]]' has been modified since the precompiled header '[[A_PCH:.*/a\.pch]]' was built: content changed +// CHECK: '[[M_H]]' required by '[[M_PCM:.*[/\\]m.*\.pcm]]' +// CHECK: '[[M_PCM]]' required by '[[A_PCH]]' +// CHECK: please rebuild precompiled header '[[A_PCH]]' +// expected-no-diagnostics diff --git a/clang/test/PCH/validate-file-content.m b/clang/test/PCH/validate-file-content.m new file mode 100644 --- /dev/null +++ b/clang/test/PCH/validate-file-content.m @@ -0,0 +1,29 @@ +// REQUIRES: shell +// +// Check driver works +// RUN: %clang -x objective-c-header -fsyntax-only -fpch-validate-input-files-content %t/a.h -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s +// CHECK-CC1: -fvalidate-ast-input-files-content +// +// PCH only: Test that a mtime mismatch without content change is fine +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo '// m.h' > %t/m.h +// RUN: echo '#include "m.h"' > %t/a.h +// RUN: %clang_cc1 -emit-pch -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content +// RUN: touch -m -a -t 202901010000 %t/m.h +// RUN: %clang_cc1 -fsyntax-only -I %t -include-pch %t/a.pch %s -verify -fvalidate-ast-input-files-content +// +// PCH only: Test that a mtime mismatch with content change +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo '// m.h' > %t/m.h +// RUN: echo '#include "m.h"' > %t/a.h +// RUN: %clang_cc1 -emit-pch -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content +// RUN: echo '// m.x' > %t/m.h +// RUN: touch -m -a -t 202901010000 %t/m.h +// RUN: not %clang_cc1 -fsyntax-only -I %t -include-pch %t/a.pch %s -fvalidate-ast-input-files-content 2> %t/stderr +// RUN: FileCheck %s < %t/stderr +// +// CHECK: file '[[M_H:.*[/\\]m\.h]]' has been modified since the precompiled header '[[A_PCH:.*/a\.pch]]' was built: content changed +// CHECK: please rebuild precompiled header '[[A_PCH]]' +// expected-no-diagnostics