diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -179,8 +179,10 @@ Lex(F, SM, Err, Context), M(M), Index(Index), Slots(Slots), BlockAddressPFS(nullptr) {} bool Run( - bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback = - [](StringRef) { return std::nullopt; }); + bool UpgradeDebugInfo, + DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) { + return std::nullopt; + }); bool parseStandaloneConstantValue(Constant *&C, const SlotMapping *Slots); @@ -318,8 +320,8 @@ bool parseTopLevelEntities(); bool validateEndOfModule(bool UpgradeDebugInfo); bool validateEndOfIndex(); - bool parseTargetDefinitions(); - bool parseTargetDefinition(); + bool parseTargetDefinitions(DataLayoutCallbackTy DataLayoutCallback); + bool parseTargetDefinition(std::string &TentativeDLStr, LocTy &DLStrLoc); bool parseModuleAsm(); bool parseSourceFileName(); bool parseUnnamedType(); diff --git a/llvm/include/llvm/AsmParser/Parser.h b/llvm/include/llvm/AsmParser/Parser.h --- a/llvm/include/llvm/AsmParser/Parser.h +++ b/llvm/include/llvm/AsmParser/Parser.h @@ -29,7 +29,7 @@ class SMDiagnostic; class Type; -typedef llvm::function_ref(StringRef)> +typedef llvm::function_ref(StringRef, StringRef)> DataLayoutCallbackTy; /// This function is a main interface to the LLVM Assembly Parser. It parses @@ -86,7 +86,7 @@ ParsedModuleAndIndex parseAssemblyFileWithIndex( StringRef Filename, SMDiagnostic &Err, LLVMContext &Context, SlotMapping *Slots = nullptr, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { + DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) { return std::nullopt; }); @@ -127,7 +127,7 @@ std::unique_ptr parseAssembly( MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context, SlotMapping *Slots = nullptr, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { + DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) { return std::nullopt; }); @@ -169,7 +169,7 @@ bool parseAssemblyInto( MemoryBufferRef F, Module *M, ModuleSummaryIndex *Index, SMDiagnostic &Err, SlotMapping *Slots = nullptr, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { + DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) { return std::nullopt; }); diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h --- a/llvm/include/llvm/Bitcode/BitcodeReader.h +++ b/llvm/include/llvm/Bitcode/BitcodeReader.h @@ -34,7 +34,10 @@ class MemoryBuffer; class ModuleSummaryIndex; -typedef llvm::function_ref(StringRef)> +// Callback to override the data layout string of an imported bitcode module. +// The first argument is the target triple, the second argument the old data +// layout string. +typedef llvm::function_ref(StringRef, StringRef)> DataLayoutCallbackTy; // These functions are for converting Expected/Error values to @@ -101,14 +104,18 @@ /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. /// If IsImporting is true, this module is being parsed for ThinLTO /// importing into another module. - Expected> getLazyModule(LLVMContext &Context, - bool ShouldLazyLoadMetadata, - bool IsImporting); + Expected> getLazyModule( + LLVMContext &Context, bool ShouldLazyLoadMetadata, bool IsImporting, + DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) { + return std::nullopt; + }); /// Read the entire bitcode module and return it. Expected> parseModule( - LLVMContext &Context, DataLayoutCallbackTy DataLayoutCallback = - [](StringRef) { return std::nullopt; }); + LLVMContext &Context, + DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) { + return std::nullopt; + }); /// Returns information about the module to be used for LTO: whether to /// compile with ThinLTO, and whether it has a summary. @@ -145,10 +152,12 @@ /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, /// lazily load metadata as well. If IsImporting is true, this module is /// being parsed for ThinLTO importing into another module. - Expected> - getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, - bool ShouldLazyLoadMetadata = false, - bool IsImporting = false); + Expected> getLazyBitcodeModule( + MemoryBufferRef Buffer, LLVMContext &Context, + bool ShouldLazyLoadMetadata = false, bool IsImporting = false, + DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) { + return std::nullopt; + }); /// Like getLazyBitcodeModule, except that the module takes ownership of /// the memory buffer if successful. If successful, this moves Buffer. On @@ -175,7 +184,7 @@ /// Read the specified bitcode file, returning the module. Expected> parseBitcodeFile( MemoryBufferRef Buffer, LLVMContext &Context, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { + DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) { return std::nullopt; }); diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h --- a/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h +++ b/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h @@ -34,7 +34,7 @@ class SMDiagnostic; class StringRef; -typedef llvm::function_ref(StringRef)> +typedef llvm::function_ref(StringRef, StringRef)> DataLayoutCallbackTy; /// This class initializes machine functions by applying the state loaded from @@ -52,9 +52,8 @@ /// A new, empty module is created if the LLVM IR isn't present. /// \returns nullptr if a parsing error occurred. std::unique_ptr - parseIRModule(DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { - return std::nullopt; - }); + parseIRModule(DataLayoutCallbackTy DataLayoutCallback = + [](StringRef, StringRef) { return std::nullopt; }); /// Parses MachineFunctions in the MIR file and add them to the given /// MachineModuleInfo \p MMI. diff --git a/llvm/include/llvm/IRReader/IRReader.h b/llvm/include/llvm/IRReader/IRReader.h --- a/llvm/include/llvm/IRReader/IRReader.h +++ b/llvm/include/llvm/IRReader/IRReader.h @@ -27,7 +27,7 @@ class SMDiagnostic; class LLVMContext; -typedef llvm::function_ref(StringRef)> +typedef llvm::function_ref(StringRef, StringRef)> DataLayoutCallbackTy; /// If the given MemoryBuffer holds a bitcode image, return a Module @@ -55,7 +55,7 @@ /// \param DataLayoutCallback Override datalayout in the llvm assembly. std::unique_ptr parseIR( MemoryBufferRef Buffer, SMDiagnostic &Err, LLVMContext &Context, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { + DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) { return std::nullopt; }); @@ -65,7 +65,7 @@ /// \param DataLayoutCallback Override datalayout in the llvm assembly. std::unique_ptr parseIRFile( StringRef Filename, SMDiagnostic &Err, LLVMContext &Context, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { + DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) { return std::nullopt; }); } diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -95,11 +95,8 @@ "Can't read textual IR with a Context that discards named Values"); if (M) { - if (parseTargetDefinitions()) + if (parseTargetDefinitions(DataLayoutCallback)) return true; - - if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple())) - M->setDataLayout(*LayoutOverride); } return parseTopLevelEntities() || validateEndOfModule(UpgradeDebugInfo) || @@ -353,11 +350,19 @@ // Top-Level Entities //===----------------------------------------------------------------------===// -bool LLParser::parseTargetDefinitions() { - while (true) { +bool LLParser::parseTargetDefinitions(DataLayoutCallbackTy DataLayoutCallback) { + // Delay parsing of the data layout string until the target triple is known. + // Then, pass both the the target triple and the tentative data layout string + // to DataLayoutCallback, allowing to override the DL string. + // This enables importing modules with invalid DL strings. + std::string TentativeDLStr = M->getDataLayoutStr(); + LocTy DLStrLoc; + + bool Done = false; + while (!Done) { switch (Lex.getKind()) { case lltok::kw_target: - if (parseTargetDefinition()) + if (parseTargetDefinition(TentativeDLStr, DLStrLoc)) return true; break; case lltok::kw_source_filename: @@ -365,9 +370,25 @@ return true; break; default: - return false; + Done = true; } } + // Run the override callback to potentially change the data layout string, and + // parse the data layout string. Parsing of the data layout is done separately + // in the two cases below because the error handling differs: For invalid + // data layout strings parsed from the module file, we want to report an error + // referencing the relevant source location. For layout strings created by + // callbacks, we let Module::setDataLayout handle potential errors. + if (auto LayoutOverride = + DataLayoutCallback(M->getTargetTriple(), TentativeDLStr)) { + M->setDataLayout(*LayoutOverride); + } else { + Expected MaybeDL = DataLayout::parse(TentativeDLStr); + if (!MaybeDL) + return error(DLStrLoc, toString(MaybeDL.takeError())); + M->setDataLayout(MaybeDL.get()); + } + return false; } bool LLParser::parseTopLevelEntities() { @@ -471,7 +492,8 @@ /// toplevelentity /// ::= 'target' 'triple' '=' STRINGCONSTANT /// ::= 'target' 'datalayout' '=' STRINGCONSTANT -bool LLParser::parseTargetDefinition() { +bool LLParser::parseTargetDefinition(std::string &TentativeDLStr, + LocTy &DLStrLoc) { assert(Lex.getKind() == lltok::kw_target); std::string Str; switch (Lex.Lex()) { @@ -488,13 +510,9 @@ Lex.Lex(); if (parseToken(lltok::equal, "expected '=' after target datalayout")) return true; - LocTy Loc = Lex.getLoc(); - if (parseStringConstant(Str)) + DLStrLoc = Lex.getLoc(); + if (parseStringConstant(TentativeDLStr)) return true; - Expected MaybeDL = DataLayout::parse(Str); - if (!MaybeDL) - return error(Loc, toString(MaybeDL.takeError())); - M->setDataLayout(MaybeDL.get()); return false; } } diff --git a/llvm/lib/AsmParser/Parser.cpp b/llvm/lib/AsmParser/Parser.cpp --- a/llvm/lib/AsmParser/Parser.cpp +++ b/llvm/lib/AsmParser/Parser.cpp @@ -91,9 +91,10 @@ SMDiagnostic &Err, LLVMContext &Context, SlotMapping *Slots) { - return ::parseAssemblyWithIndex(F, Err, Context, Slots, - /*UpgradeDebugInfo*/ true, - [](StringRef) { return std::nullopt; }); + return ::parseAssemblyWithIndex( + F, Err, Context, Slots, + /*UpgradeDebugInfo*/ true, + [](StringRef, StringRef) { return std::nullopt; }); } static ParsedModuleAndIndex @@ -150,7 +151,7 @@ // index, but we need to initialize it. LLVMContext unusedContext; return LLParser(F.getBuffer(), SM, Err, nullptr, &Index, unusedContext) - .Run(true, [](StringRef) { return std::nullopt; }); + .Run(true, [](StringRef, StringRef) { return std::nullopt; }); } std::unique_ptr diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -821,7 +821,7 @@ Error parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); Error parseModule( uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { + DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) { return std::nullopt; }); @@ -4192,21 +4192,35 @@ // Parts of bitcode parsing depend on the datalayout. Make sure we // finalize the datalayout before we run any of that code. bool ResolvedDataLayout = false; - auto ResolveDataLayout = [&] { + // In order to support importing modules with illegal data layout strings, + // delay parsing the data layout string until after upgrades and overrides + // have been applied, allowing to fix illegal data layout strings. + // Initialize to the current module's layout string in case none is specified. + std::string TentativeDataLayoutStr = TheModule->getDataLayoutStr(); + + auto ResolveDataLayout = [&]() -> Error { if (ResolvedDataLayout) - return; + return Error::success(); - // datalayout and triple can't be parsed after this point. + // Datalayout and triple can't be parsed after this point. ResolvedDataLayout = true; - // Upgrade data layout string. - std::string DL = llvm::UpgradeDataLayoutString( - TheModule->getDataLayoutStr(), TheModule->getTargetTriple()); - TheModule->setDataLayout(DL); + // Auto-upgrade the layout string + TentativeDataLayoutStr = llvm::UpgradeDataLayoutString( + TentativeDataLayoutStr, TheModule->getTargetTriple()); + + // Apply override + if (auto LayoutOverride = DataLayoutCallback(TheModule->getTargetTriple(), + TentativeDataLayoutStr)) + TentativeDataLayoutStr = *LayoutOverride; + + // Now the layout string is finalized in TentativeDataLayoutStr. Parse it. + Expected MaybeDL = DataLayout::parse(TentativeDataLayoutStr); + if (!MaybeDL) + return MaybeDL.takeError(); - if (auto LayoutOverride = - DataLayoutCallback(TheModule->getTargetTriple())) - TheModule->setDataLayout(*LayoutOverride); + TheModule->setDataLayout(MaybeDL.get()); + return Error::success(); }; // Read all the records for this module. @@ -4220,7 +4234,8 @@ case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: - ResolveDataLayout(); + if (Error Err = ResolveDataLayout()) + return Err; return globalCleanup(); case BitstreamEntry::SubBlock: @@ -4285,7 +4300,8 @@ return Err; break; case bitc::FUNCTION_BLOCK_ID: - ResolveDataLayout(); + if (Error Err = ResolveDataLayout()) + return Err; // If this is the first function body we've seen, reverse the // FunctionsWithBodies list. @@ -4384,13 +4400,8 @@ case bitc::MODULE_CODE_DATALAYOUT: { // DATALAYOUT: [strchr x N] if (ResolvedDataLayout) return error("datalayout too late in module"); - std::string S; - if (convertToString(Record, 0, S)) + if (convertToString(Record, 0, TentativeDataLayoutStr)) return error("Invalid record"); - Expected MaybeDL = DataLayout::parse(S); - if (!MaybeDL) - return MaybeDL.takeError(); - TheModule->setDataLayout(MaybeDL.get()); break; } case bitc::MODULE_CODE_ASM: { // ASM: [strchr x N] @@ -4436,7 +4447,8 @@ return Err; break; case bitc::MODULE_CODE_FUNCTION: - ResolveDataLayout(); + if (Error Err = ResolveDataLayout()) + return Err; if (Error Err = parseFunctionRecord(Record)) return Err; break; @@ -4465,6 +4477,7 @@ } Record.clear(); } + return Error::success(); } Error BitcodeReader::parseBitcodeInto(Module *M, bool ShouldLazyLoadMetadata, @@ -7942,9 +7955,10 @@ Expected> BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, - bool IsImporting) { + bool IsImporting, + DataLayoutCallbackTy DataLayoutCallback) { return getModuleImpl(Context, false, ShouldLazyLoadMetadata, IsImporting, - [](StringRef) { return std::nullopt; }); + DataLayoutCallback); } // Parse the specified bitcode buffer and merge the index into CombinedIndex. @@ -8090,12 +8104,14 @@ Expected> llvm::getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, - bool ShouldLazyLoadMetadata, bool IsImporting) { + bool ShouldLazyLoadMetadata, bool IsImporting, + DataLayoutCallbackTy DataLayoutCallback) { Expected BM = getSingleModule(Buffer); if (!BM) return BM.takeError(); - return BM->getLazyModule(Context, ShouldLazyLoadMetadata, IsImporting); + return BM->getLazyModule(Context, ShouldLazyLoadMetadata, IsImporting, + DataLayoutCallback); } Expected> llvm::getOwningLazyBitcodeModule( diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -234,7 +234,8 @@ // Create an empty module when the MIR file is empty. NoMIRDocuments = true; auto M = std::make_unique(Filename, Context); - if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple())) + if (auto LayoutOverride = + DataLayoutCallback(M->getTargetTriple(), M->getDataLayoutStr())) M->setDataLayout(*LayoutOverride); return M; } @@ -257,7 +258,8 @@ } else { // Create an new, empty module. M = std::make_unique(Filename, Context); - if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple())) + if (auto LayoutOverride = + DataLayoutCallback(M->getTargetTriple(), M->getDataLayoutStr())) M->setDataLayout(*LayoutOverride); NoLLVMIR = true; } diff --git a/llvm/test/Assembler/invalid-datalayout-override.ll b/llvm/test/Assembler/invalid-datalayout-override.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Assembler/invalid-datalayout-override.ll @@ -0,0 +1,7 @@ +; Check that importing this file gives an error due to the broken data layout: +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; Check that specifying a valid data layout allows to import this file: +; RUN: llvm-as -data-layout "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" < %s + +target datalayout = "A16777216" +; CHECK: Invalid address space, must be a 24-bit integer diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -530,8 +530,8 @@ // If user just wants to list available options, skip module loading if (!SkipModule) { - auto SetDataLayout = - [&](StringRef DataLayoutTargetTriple) -> std::optional { + auto SetDataLayout = [&](StringRef DataLayoutTargetTriple, + StringRef OldDLStr) -> std::optional { // If we are supposed to override the target triple, do so now. std::string IRTargetTriple = DataLayoutTargetTriple.str(); if (!TargetTriple.empty()) diff --git a/llvm/tools/llvm-as/llvm-as.cpp b/llvm/tools/llvm-as/llvm-as.cpp --- a/llvm/tools/llvm-as/llvm-as.cpp +++ b/llvm/tools/llvm-as/llvm-as.cpp @@ -121,7 +121,7 @@ // Parse the file now... SMDiagnostic Err; - auto SetDataLayout = [](StringRef) -> std::optional { + auto SetDataLayout = [](StringRef, StringRef) -> std::optional { if (ClDataLayout.empty()) return std::nullopt; return ClDataLayout; diff --git a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp --- a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp +++ b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp @@ -407,8 +407,8 @@ std::unique_ptr MParser = createMIRParser(std::move(FileOrErr.get()), Ctxt); - auto SetDataLayout = - [&](StringRef DataLayoutTargetTriple) -> std::optional { + auto SetDataLayout = [&](StringRef DataLayoutTargetTriple, + StringRef OldDLStr) -> std::optional { // If we are supposed to override the target triple, do so now. std::string IRTargetTriple = DataLayoutTargetTriple.str(); if (!TargetTriple.empty()) diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -542,7 +542,7 @@ std::unique_ptr RemarksFile = std::move(*RemarksFileOrErr); // Load the input module... - auto SetDataLayout = [](StringRef) -> std::optional { + auto SetDataLayout = [](StringRef, StringRef) -> std::optional { if (ClDataLayout.empty()) return std::nullopt; return ClDataLayout;