diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -515,6 +515,7 @@ bool parseGlobalValueVector(SmallVectorImpl &Elts, Optional *InRangeOp = nullptr); bool parseOptionalComdat(StringRef GlobalName, Comdat *&C); + bool parseSanitizer(GlobalVariable *GV); bool parseMetadataAsValue(Value *&V, PerFunctionState &PFS); bool parseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg, PerFunctionState *PFS); diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -391,6 +391,19 @@ kw_bit, kw_varFlags, + // GV's with __attribute__((no_sanitize("address"))), or things in + // -fsanitize-ignorelist when built with ASan. + kw_no_sanitize_address, + // GV's with __attribute__((no_sanitize("hwaddress"))), or things in + // -fsanitize-ignorelist when built with HWASan. + kw_no_sanitize_hwaddress, + // GV's with __attribute__((no_sanitize("memtag"))), or things in + // -fsanitize-ignorelist when built with memory tagging. + kw_no_sanitize_memtag, + // GV's where the clang++ frontend (when ASan is used) notes that this is + // dynamically initialized, and thus needs ODR detection. + kw_sanitize_address_dyninit, + // Unsigned Valued tokens (UIntVal). LabelID, // 42: GlobalID, // @42 diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h --- a/llvm/include/llvm/IR/GlobalValue.h +++ b/llvm/include/llvm/IR/GlobalValue.h @@ -79,14 +79,15 @@ ValueType(Ty), Visibility(DefaultVisibility), UnnamedAddrVal(unsigned(UnnamedAddr::None)), DllStorageClass(DefaultStorageClass), ThreadLocal(NotThreadLocal), - HasLLVMReservedName(false), IsDSOLocal(false), HasPartition(false) { + HasLLVMReservedName(false), IsDSOLocal(false), HasPartition(false), + HasSanitizerMetadata(false) { setLinkage(Linkage); setName(Name); } Type *ValueType; - static const unsigned GlobalValueSubClassDataBits = 16; + static const unsigned GlobalValueSubClassDataBits = 15; // All bitfields use unsigned as the underlying type so that MSVC will pack // them. @@ -111,9 +112,14 @@ /// https://lld.llvm.org/Partitions.html). unsigned HasPartition : 1; + /// True if this symbol has sanitizer metadata available. Should only happen + /// if sanitizers were enabled when building the translation unit which + /// contains this GV. + unsigned HasSanitizerMetadata : 1; + private: // Give subclasses access to what otherwise would be wasted padding. - // (16 + 4 + 2 + 2 + 2 + 3 + 1 + 1 + 1) == 32. + // (15 + 4 + 2 + 2 + 2 + 3 + 1 + 1 + 1 + 1) == 32. unsigned SubClassData : GlobalValueSubClassDataBits; friend class Constant; @@ -288,6 +294,39 @@ StringRef getPartition() const; void setPartition(StringRef Part); + // ASan, HWASan and Memtag sanitizers have some instrumentation that applies + // specifically to global variables. This instrumentation is implicitly + // applied to all global variables when built with -fsanitize=*. What we need + // is a way to persist the information that a certain global variable should + // *not* have sanitizers applied, which occurs if: + // 1. The global variable is in the sanitizer ignore list, or + // 2. The global variable is created by the sanitizers itself for internal + // usage, or + // 3. The global variable has __attribute__((no_sanitize("..."))) or + // __attribute__((disable_sanitizer_instrumentation)). + // + // This is important, a some IR passes like GlobalMerge can delete global + // variables and replace them with new ones. If the old variables were marked + // to be unsanitized, then the new ones should also be. + struct SanitizerMetadata { + SanitizerMetadata() + : NoAddress(false), NoHWAddress(false), NoMemtag(false), + IsDynInit(false) {} + unsigned NoAddress : 1; + unsigned NoHWAddress : 1; + unsigned NoMemtag : 1; + + // ASan-specific metadata. Is this global variable dynamically initialized + // (from a C++ language perspective), and should therefore be checked for + // ODR violations. + unsigned IsDynInit : 1; + }; + + bool hasSanitizerMetadata() const { return HasSanitizerMetadata; } + const SanitizerMetadata &getSanitizerMetadata() const; + void setSanitizerMetadata(const SanitizerMetadata &Meta); + void removeSanitizerMetadata(); + static LinkageTypes getLinkOnceLinkage(bool ODR) { return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage; } diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -580,6 +580,11 @@ KEYWORD(prefix); KEYWORD(prologue); + KEYWORD(no_sanitize_address); + KEYWORD(no_sanitize_hwaddress); + KEYWORD(no_sanitize_memtag); + KEYWORD(sanitize_address_dyninit); + KEYWORD(ccc); KEYWORD(fastcc); KEYWORD(coldcc); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1103,6 +1103,45 @@ return false; } +static bool isSanitizer(lltok::Kind Kind) { + switch (Kind) { + case lltok::kw_no_sanitize_address: + case lltok::kw_no_sanitize_hwaddress: + case lltok::kw_no_sanitize_memtag: + case lltok::kw_sanitize_address_dyninit: + return true; + default: + return false; + } +} + +bool LLParser::parseSanitizer(GlobalVariable *GV) { + using SanitizerMetadata = GlobalValue::SanitizerMetadata; + SanitizerMetadata Meta; + if (GV->hasSanitizerMetadata()) + Meta = GV->getSanitizerMetadata(); + + switch (Lex.getKind()) { + case lltok::kw_no_sanitize_address: + Meta.NoAddress = true; + break; + case lltok::kw_no_sanitize_hwaddress: + Meta.NoHWAddress = true; + break; + case lltok::kw_no_sanitize_memtag: + Meta.NoMemtag = true; + break; + case lltok::kw_sanitize_address_dyninit: + Meta.IsDynInit = true; + break; + default: + return tokError("non-sanitizer token passed to LLParser::parseSanitizer()"); + } + GV->setSanitizerMetadata(Meta); + Lex.Lex(); + return false; +} + /// parseGlobal /// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier /// OptionalVisibility OptionalDLLStorageClass @@ -1221,6 +1260,9 @@ } else if (Lex.getKind() == lltok::MetadataVar) { if (parseGlobalObjectMetadataAttachment(*GV)) return true; + } else if (isSanitizer(Lex.getKind())) { + if (parseSanitizer(GV)) + return true; } else { Comdat *C; if (parseOptionalComdat(Name, C)) diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -3441,6 +3441,19 @@ GV->setDSOLocal(true); } +GlobalValue::SanitizerMetadata deserializeSanitizerMetadata(unsigned V) { + GlobalValue::SanitizerMetadata Meta; + if (V & (1 << 0)) + Meta.NoAddress = true; + if (V & (1 << 1)) + Meta.NoHWAddress = true; + if (V & (1 << 2)) + Meta.NoMemtag = true; + if (V & (1 << 3)) + Meta.IsDynInit = true; + return Meta; +} + Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { // v1: [pointer type, isconst, initid, linkage, alignment, section, // visibility, threadlocal, unnamed_addr, externally_initialized, @@ -3544,6 +3557,12 @@ if (Record.size() > 15) NewGV->setPartition(StringRef(Strtab.data() + Record[14], Record[15])); + if (Record.size() > 16 && Record[16] != UINT_MAX) { + llvm::GlobalValue::SanitizerMetadata Meta = + deserializeSanitizerMetadata(Record[16]); + NewGV->setSanitizerMetadata(Meta); + } + return Error::success(); } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1223,6 +1223,14 @@ return SE_Fixed7; } +static_assert(sizeof(GlobalValue::SanitizerMetadata) <= sizeof(unsigned), + "Sanitizer Metadata is too large for naive serialization."); +static unsigned +serializeSanitizerMetadata(const GlobalValue::SanitizerMetadata &Meta) { + return Meta.NoAddress | (Meta.NoHWAddress << 1) | + (Meta.NoMemtag << 2) | (Meta.IsDynInit << 3); +} + /// Emit top-level description of module, including target triple, inline asm, /// descriptors for global variables, and function prototype info. /// Returns the bit offset to backpatch with the location of the real VST. @@ -1346,7 +1354,7 @@ // GLOBALVAR: [strtab offset, strtab size, type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass, - // comdat, attributes, DSO_Local] + // comdat, attributes, DSO_Local, GlobalSanitizer] Vals.push_back(addToStrtab(GV.getName())); Vals.push_back(GV.getName().size()); Vals.push_back(VE.getTypeID(GV.getValueType())); @@ -1363,7 +1371,7 @@ GV.isExternallyInitialized() || GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass || GV.hasComdat() || GV.hasAttributes() || GV.isDSOLocal() || - GV.hasPartition()) { + GV.hasPartition() || GV.hasSanitizerMetadata()) { Vals.push_back(getEncodedVisibility(GV)); Vals.push_back(getEncodedThreadLocalMode(GV)); Vals.push_back(getEncodedUnnamedAddr(GV)); @@ -1377,6 +1385,11 @@ Vals.push_back(GV.isDSOLocal()); Vals.push_back(addToStrtab(GV.getPartition())); Vals.push_back(GV.getPartition().size()); + + if (GV.hasSanitizerMetadata()) + Vals.push_back(serializeSanitizerMetadata(GV.getSanitizerMetadata())); + else + Vals.push_back(UINT_MAX); } else { AbbrevToUse = SimpleGVarAbbrev; } diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -3535,6 +3535,19 @@ Out << '"'; } + using SanitizerMetadata = llvm::GlobalValue::SanitizerMetadata; + if (GV->hasSanitizerMetadata()) { + SanitizerMetadata MD = GV->getSanitizerMetadata(); + if (MD.NoAddress) + Out << ", no_sanitize_address"; + if (MD.NoHWAddress) + Out << ", no_sanitize_hwaddress"; + if (MD.NoMemtag) + Out << ", no_sanitize_memtag"; + if (MD.IsDynInit) + Out << ", sanitize_address_dyninit"; + } + maybePrintComdat(Out, *GV); if (MaybeAlign A = GV->getAlign()) Out << ", align " << A->value(); diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -67,6 +67,10 @@ setDLLStorageClass(Src->getDLLStorageClass()); setDSOLocal(Src->isDSOLocal()); setPartition(Src->getPartition()); + if (Src->hasSanitizerMetadata()) + setSanitizerMetadata(Src->getSanitizerMetadata()); + else + removeSanitizerMetadata(); } void GlobalValue::removeFromParent() { @@ -217,6 +221,27 @@ HasPartition = !S.empty(); } +using SanitizerMetadata = GlobalValue::SanitizerMetadata; +const SanitizerMetadata &GlobalValue::getSanitizerMetadata() const { + assert(hasSanitizerMetadata()); + assert(getContext().pImpl->GlobalValueSanitizerMetadata.count(this)); + return getContext().pImpl->GlobalValueSanitizerMetadata[this]; +} + +void GlobalValue::setSanitizerMetadata(const SanitizerMetadata &Meta) { + getContext().pImpl->GlobalValueSanitizerMetadata[this] = Meta; + HasSanitizerMetadata = true; +} + +void GlobalValue::removeSanitizerMetadata() { + DenseMap &MetadataMap = + getContext().pImpl->GlobalValueSanitizerMetadata; + auto It = MetadataMap.find(this); + if (It != MetadataMap.end()) + MetadataMap.erase(It); + HasSanitizerMetadata = false; +} + StringRef GlobalObject::getSectionImpl() const { assert(hasSection()); return getContext().pImpl->GlobalObjectSections[this]; diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1503,6 +1503,9 @@ /// Collection of per-GlobalValue partitions used in this context. DenseMap GlobalValuePartitions; + DenseMap + GlobalValueSanitizerMetadata; + /// DiscriminatorTable - This table maps file:line locations to an /// integer representing the next DWARF path discriminator to assign to /// instructions in different blocks at the same location. diff --git a/llvm/test/Assembler/globalvariable-attributes.ll b/llvm/test/Assembler/globalvariable-attributes.ll --- a/llvm/test/Assembler/globalvariable-attributes.ll +++ b/llvm/test/Assembler/globalvariable-attributes.ll @@ -4,6 +4,11 @@ @g2 = global i32 2, align 4 "key3" = "value3" @g3 = global i32 2 #0 @g4 = global i32 2, align 4 "key5" = "value5" #0 +@g5 = global i32 2, no_sanitize_address, align 4 +@g6 = global i32 2, no_sanitize_hwaddress, align 4 +@g7 = global i32 2, no_sanitize_memtag, align 4 +@g8 = global i32 2, sanitize_address_dyninit, align 4 +@g9 = global i32 2, no_sanitize_address, no_sanitize_hwaddress, no_sanitize_memtag, align 4 attributes #0 = { "string" = "value" nobuiltin norecurse } @@ -11,6 +16,11 @@ ; CHECK: @g2 = global i32 2, align 4 #1 ; CHECK: @g3 = global i32 2 #2 ; CHECK: @g4 = global i32 2, align 4 #3 +; CHECK: @g5 = global i32 2, no_sanitize_address, align 4 +; CHECK: @g6 = global i32 2, no_sanitize_hwaddress, align 4 +; CHECK: @g7 = global i32 2, no_sanitize_memtag, align 4 +; CHECK: @g8 = global i32 2, sanitize_address_dyninit, align 4 +; CHECK: @g9 = global i32 2, no_sanitize_address, no_sanitize_hwaddress, no_sanitize_memtag, align 4 ; CHECK: attributes #0 = { "key"="value" "key2"="value2" } ; CHECK: attributes #1 = { "key3"="value3" } diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -203,6 +203,18 @@ @llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata" ; CHECK: @llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata" +; Global Variables -- sanitizers +@g.no_sanitize_address = global i32 0, no_sanitize_address +@g.no_sanitize_hwaddress = global i32 0, no_sanitize_hwaddress +@g.no_sanitize_memtag = global i32 0, no_sanitize_memtag +@g.no_sanitize_multiple = global i32 0, no_sanitize_address, no_sanitize_hwaddress, no_sanitize_memtag +@g.sanitize_address_dyninit = global i32 0, sanitize_address_dyninit +; CHECK: @g.no_sanitize_address = global i32 0, no_sanitize_address +; CHECK: @g.no_sanitize_hwaddress = global i32 0, no_sanitize_hwaddress +; CHECK: @g.no_sanitize_memtag = global i32 0, no_sanitize_memtag +; CHECK: @g.no_sanitize_multiple = global i32 0, no_sanitize_address, no_sanitize_hwaddress, no_sanitize_memtag +; CHECK: @g.sanitize_address_dyninit = global i32 0, sanitize_address_dyninit + ;; Aliases ; Format: @ = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal] ; [unnamed_addr] alias @