diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -108,6 +108,12 @@ SmallVector InstsWithTBAATag; + /// DIAssignID metadata does not support temporary RAUW so we cannot use + /// the normal metadata forward reference resolution method. Instead, + /// non-temporary DIAssignID are attached to instructions (recorded here) + /// then replaced later. + DenseMap> TempDIAssignIDAttachments; + // Type resolution handling data structures. The location is set when we // have processed a use of the type but not a definition yet. StringMap > NamedTypes; diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h --- a/llvm/include/llvm/IR/DebugInfo.h +++ b/llvm/include/llvm/IR/DebugInfo.h @@ -21,7 +21,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/IntrinsicInst.h" namespace llvm { @@ -159,6 +159,67 @@ SmallPtrSet NodesSeen; }; +/// Assignment Tracking (at). +namespace at { +// +// Utilities for enumerating storing instructions from an assignment ID. +// +/// A range of instructions. +using AssignmentInstRange = + iterator_range::iterator>; +/// Return a range of instructions (typically just one) that have \p ID +/// as an attachment. +/// Iterators invalidated by adding or removing DIAssignID metadata to/from any +/// instruction (including by deleting or cloning instructions). +AssignmentInstRange getAssignmentInsts(DIAssignID *ID); +/// Return a range of instructions (typically just one) that perform the +/// assignment that \p DAI encodes. +/// Iterators invalidated by adding or removing DIAssignID metadata to/from any +/// instruction (including by deleting or cloning instructions). +inline AssignmentInstRange getAssignmentInsts(const DbgAssignIntrinsic *DAI) { + return getAssignmentInsts(cast(DAI->getAssignID())); +} + +// +// Utilities for enumerating llvm.dbg.assign intrinsic from an assignment ID. +// +/// High level: this is an iterator for llvm.dbg.assign intrinsics. +/// Implementation details: this is a wrapper around Value's User iterator that +/// dereferences to a DbgAssignIntrinsic ptr rather than a User ptr. +class DbgAssignIt + : public iterator_adaptor_base::iterator_category, + DbgAssignIntrinsic *, std::ptrdiff_t, + DbgAssignIntrinsic **, + DbgAssignIntrinsic *&> { +public: + DbgAssignIt(Value::user_iterator It) : iterator_adaptor_base(It) {} + DbgAssignIntrinsic *operator*() const { return cast(*I); } +}; +/// A range of llvm.dbg.assign intrinsics. +using AssignmentMarkerRange = iterator_range; +/// Return a range of dbg.assign intrinsics which use \ID as an operand. +/// Iterators invalidated by deleting an intrinsic contained in this range. +AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID); +/// Return a range of dbg.assign intrinsics for which \p Inst performs the +/// assignment they encode. +/// Iterators invalidated by deleting an intrinsic contained in this range. +inline AssignmentMarkerRange getAssignmentMarkers(const Instruction *Inst) { + if (auto *ID = Inst->getMetadata(LLVMContext::MD_DIAssignID)) + return getAssignmentMarkers(cast(ID)); + else + return make_range(Value::user_iterator(), Value::user_iterator()); +} + +/// Replace all uses (and attachments) of \p Old with \p New. +void RAUW(DIAssignID *Old, DIAssignID *New); + +/// Remove all Assignment Tracking related intrinsics and metadata from \p F. +void deleteAll(Function *F); + +} // end namespace at + /// Return true if assignment tracking is enabled. bool getEnableAssignmentTracking(); } // end namespace llvm diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -515,6 +515,10 @@ void getAllMetadataImpl(SmallVectorImpl> &) const; + /// Update the LLVMContext ID-to-Instruction(s) mapping. If \p ID is nullptr + /// then clear the mapping for this instruction. + void updateDIAssignIDMapping(DIAssignID *ID); + public: //===--------------------------------------------------------------------===// // Predicates and helper methods. diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -853,7 +853,18 @@ // See if this was forward referenced, if so, handle it. auto FI = ForwardRefMDNodes.find(MetadataID); if (FI != ForwardRefMDNodes.end()) { - FI->second.first->replaceAllUsesWith(Init); + auto *ToReplace = FI->second.first.get(); + // DIAssignID has its own special forward-reference "replacement" for + // attachments (the temporary attachments are never actually attached). + if (isa(Init)) { + for (auto *Inst : TempDIAssignIDAttachments[ToReplace]) { + assert(!Inst->getMetadata(LLVMContext::MD_DIAssignID) && + "Inst unexpectedly already has DIAssignID attachment"); + Inst->setMetadata(LLVMContext::MD_DIAssignID, Init); + } + } + + ToReplace->replaceAllUsesWith(Init); ForwardRefMDNodes.erase(FI); assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work"); @@ -2082,7 +2093,11 @@ if (parseMetadataAttachment(MDK, N)) return true; - Inst.setMetadata(MDK, N); + if (MDK == LLVMContext::MD_DIAssignID) + TempDIAssignIDAttachments[N].push_back(&Inst); + else + Inst.setMetadata(MDK, N); + if (MDK == LLVMContext::MD_tbaa) InstsWithTBAATag.push_back(&Inst); diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm-c/DebugInfo.h" +#include "LLVMContextImpl.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" @@ -37,6 +38,7 @@ #include using namespace llvm; +using namespace llvm::at; using namespace llvm::dwarf; static cl::opt @@ -1632,3 +1634,61 @@ return (LLVMMetadataKind)LLVMGenericDINodeMetadataKind; } } + +AssignmentInstRange at::getAssignmentInsts(DIAssignID *ID) { + assert(ID && "Expected non-null ID"); + LLVMContext &Ctx = ID->getContext(); + auto &Map = Ctx.pImpl->AssignmentIDToInstrs; + + auto MapIt = Map.find(ID); + if (MapIt == Map.end()) + return make_range(nullptr, nullptr); + + return make_range(MapIt->second.begin(), MapIt->second.end()); +} + +AssignmentMarkerRange at::getAssignmentMarkers(DIAssignID *ID) { + assert(ID && "Expected non-null ID"); + LLVMContext &Ctx = ID->getContext(); + + auto *IDAsValue = MetadataAsValue::getIfExists(Ctx, ID); + + // The ID is only used wrapped in MetadataAsValue(ID), so lets check that + // one of those already exists first. + if (!IDAsValue) + return make_range(Value::user_iterator(), Value::user_iterator()); + + return make_range(IDAsValue->user_begin(), IDAsValue->user_end()); +} + +void at::RAUW(DIAssignID *Old, DIAssignID *New) { + // Replace MetadataAsValue uses. + if (auto *OldIDAsValue = + MetadataAsValue::getIfExists(Old->getContext(), Old)) { + auto *NewIDAsValue = MetadataAsValue::get(Old->getContext(), New); + OldIDAsValue->replaceAllUsesWith(NewIDAsValue); + } + + // Replace attachments. + AssignmentInstRange InstRange = getAssignmentInsts(Old); + // Use intermediate storage for the instruction ptrs because the + // getAssignmentInsts range iterators will be invalidated by adding and + // removing DIAssignID attachments. + SmallVector InstVec(InstRange.begin(), InstRange.end()); + for (auto *I : InstVec) + I->setMetadata(LLVMContext::MD_DIAssignID, New); +} + +void at::deleteAll(Function *F) { + SmallVector ToDelete; + for (BasicBlock &BB : *F) { + for (Instruction &I : BB) { + if (auto *DAI = dyn_cast(&I)) + ToDelete.push_back(DAI); + else + I.setMetadata(LLVMContext::MD_DIAssignID, nullptr); + } + } + for (auto *DAI : ToDelete) + DAI->eraseFromParent(); +} diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -55,6 +55,10 @@ // instructions in a BasicBlock are deleted). if (isUsedByMetadata()) ValueAsMetadata::handleRAUW(this, UndefValue::get(getType())); + + // Explicitly remove DIAssignID metadata to clear up ID -> Instruction(s) + // mapping in LLVMContext. + setMetadata(LLVMContext::MD_DIAssignID, nullptr); } diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1499,6 +1499,11 @@ /// Collection of metadata used in this context. DenseMap ValueMetadata; + /// Map DIAssignID -> Instructions with that attachment. + /// Managed by Instruction via Instruction::updateDIAssignIDMapping. + /// Query using the at:: functions defined in DebugInfo.h. + DenseMap> AssignmentIDToInstrs; + /// Collection of per-GlobalObject sections used in this context. DenseMap GlobalObjectSections; diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -1425,6 +1425,37 @@ } } +void Instruction::updateDIAssignIDMapping(DIAssignID *ID) { + auto &IDToInstrs = getContext().pImpl->AssignmentIDToInstrs; + if (const DIAssignID *CurrentID = + cast_or_null(getMetadata(LLVMContext::MD_DIAssignID))) { + // Nothing to do if the ID isn't changing. + if (ID == CurrentID) + return; + + // Unmap this instruction from its current ID. + auto InstrsIt = IDToInstrs.find(CurrentID); + assert(InstrsIt != IDToInstrs.end() && + "Expect existing attachment to be mapped"); + + auto &InstVec = InstrsIt->second; + auto *InstIt = std::find(InstVec.begin(), InstVec.end(), this); + assert(InstIt != InstVec.end() && + "Expect instruction to be mapped to attachment"); + // The vector contains a ptr to this. If this is the only element in the + // vector, remove the ID:vector entry, otherwise just remove the + // instruction from the vector. + if (InstVec.size() == 1) + IDToInstrs.erase(InstrsIt); + else + InstVec.erase(InstIt); + } + + // Map this instruction to the new ID. + if (ID) + IDToInstrs[ID].push_back(this); +} + void Instruction::setMetadata(unsigned KindID, MDNode *Node) { if (!Node && !hasMetadata()) return; @@ -1435,6 +1466,16 @@ return; } + // Update DIAssignID to Instruction(s) mapping. + if (KindID == LLVMContext::MD_DIAssignID) { + // The DIAssignID tracking infrastructure doesn't support RAUWing temporary + // nodes with DIAssignIDs. The cast_or_null below would also catch this, but + // having a dedicated assert helps make this obvious. + assert((!Node || !Node->isTemporary()) && + "Temporary DIAssignIDs are invalid"); + updateDIAssignIDMapping(cast_or_null(Node)); + } + Value::setMetadata(KindID, Node); } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -68,6 +68,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" @@ -4548,6 +4549,10 @@ CheckDI(isa(User), "!DIAssignID should only be used by llvm.dbg.assign intrinsics", MD, User); + // All of the dbg.assign intrinsics should be in the same function as I. + if (auto *DAI = dyn_cast(User)) + CheckDI(DAI->getFunction() == I.getFunction(), + "dbg.assign not in same function as inst", DAI, &I); } } } @@ -6008,6 +6013,10 @@ CheckDI(isa(DAI->getRawAddressExpression()), "invalid llvm.dbg.assign intrinsic address expression", &DII, DAI->getRawAddressExpression()); + // All of the linked instructions should be in the same function as DII. + for (Instruction *I : at::getAssignmentInsts(DAI)) + CheckDI(DAI->getFunction() == I->getFunction(), + "inst not in same function as dbg.assign", I, DAI); } // Ignore broken !dbg attachments; they're checked elsewhere. diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll --- a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll @@ -6,6 +6,13 @@ ;; ;; Checks for this one are inline. +define dso_local void @fun2() !dbg !15 { + ;; DIAssignID copied here from @fun() where it is used by intrinsics. + ; CHECK: dbg.assign not in same function as inst + %x = alloca i32, align 4, !DIAssignID !14 + ret void +} + define dso_local void @fun() !dbg !7 { entry: %a = alloca i32, align 4, !DIAssignID !14 @@ -50,3 +57,4 @@ !11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !13 = !DILocation(line: 1, column: 1, scope: !7) !14 = distinct !DIAssignID() +!15 = distinct !DISubprogram(name: "fun2", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) diff --git a/llvm/unittests/IR/DebugInfoTest.cpp b/llvm/unittests/IR/DebugInfoTest.cpp --- a/llvm/unittests/IR/DebugInfoTest.cpp +++ b/llvm/unittests/IR/DebugInfoTest.cpp @@ -368,4 +368,130 @@ EXPECT_EQ(MDExp->getNumElements(), 0u); } +TEST(AssignmentTrackingTest, Utils) { + // Test the assignment tracking utils defined in DebugInfo.h namespace at {}. + // This includes: + // getAssignmentInsts + // getAssignmentMarkers + // RAUW + // deleteAll + // + // The input IR includes two functions, fun1 and fun2. Both contain an alloca + // with a DIAssignID tag. fun1's alloca is linked to two llvm.dbg.assign + // intrinsics, one of which is for an inlined variable and appears before the + // alloca. + + LLVMContext C; + std::unique_ptr M = parseIR(C, R"( + define dso_local void @fun1() !dbg !7 { + entry: + call void @llvm.dbg.assign(metadata i32 undef, metadata !10, metadata !DIExpression(), metadata !12, metadata i32 undef, metadata !DIExpression()), !dbg !13 + %local = alloca i32, align 4, !DIAssignID !12 + call void @llvm.dbg.assign(metadata i32 undef, metadata !16, metadata !DIExpression(), metadata !12, metadata i32 undef, metadata !DIExpression()), !dbg !15 + ret void, !dbg !15 + } + + define dso_local void @fun2() !dbg !17 { + entry: + %local = alloca i32, align 4, !DIAssignID !20 + call void @llvm.dbg.assign(metadata i32 undef, metadata !18, metadata !DIExpression(), metadata !20, metadata i32 undef, metadata !DIExpression()), !dbg !19 + ret void, !dbg !19 + } + + declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4, !5} + !llvm.ident = !{!6} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) + !1 = !DIFile(filename: "test.c", directory: "/") + !2 = !{} + !3 = !{i32 7, !"Dwarf Version", i32 4} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = !{i32 1, !"wchar_size", i32 4} + !6 = !{!"clang version 14.0.0"} + !7 = distinct !DISubprogram(name: "fun1", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) + !8 = !DISubroutineType(types: !9) + !9 = !{null} + !10 = !DILocalVariable(name: "local3", scope: !14, file: !1, line: 2, type: !11) + !11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !12 = distinct !DIAssignID() + !13 = !DILocation(line: 5, column: 1, scope: !14, inlinedAt: !15) + !14 = distinct !DISubprogram(name: "inline", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) + !15 = !DILocation(line: 3, column: 1, scope: !7) + !16 = !DILocalVariable(name: "local1", scope: !7, file: !1, line: 2, type: !11) + !17 = distinct !DISubprogram(name: "fun2", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) + !18 = !DILocalVariable(name: "local2", scope: !17, file: !1, line: 2, type: !11) + !19 = !DILocation(line: 4, column: 1, scope: !17) + !20 = distinct !DIAssignID() + )"); + + // Check the test IR isn't malformed. + ASSERT_TRUE(M); + + Function &Fun1 = *M->getFunction("fun1"); + Instruction &Alloca = *Fun1.getEntryBlock().getFirstNonPHIOrDbg(); + + // 1. Check the Instruction <-> Intrinsic mappings work in fun1. + // + // Check there are two llvm.dbg.assign intrinsics linked to Alloca. + auto CheckFun1Mapping = [&Alloca]() { + auto Markers = at::getAssignmentMarkers(&Alloca); + EXPECT_TRUE(std::distance(Markers.begin(), Markers.end()) == 2); + // Check those two entries are distinct. + DbgAssignIntrinsic *First = *Markers.begin(); + DbgAssignIntrinsic *Second = *std::next(Markers.begin()); + EXPECT_NE(First, Second); + + // Check that we can get back to Alloca from each llvm.dbg.assign. + for (auto *DAI : Markers) { + auto Insts = at::getAssignmentInsts(DAI); + // Check there is exactly one instruction linked to each intrinsic. Use + // ASSERT_TRUE because we're going to dereference the begin iterator. + ASSERT_TRUE(std::distance(Insts.begin(), Insts.end()) == 1); + EXPECT_FALSE(Insts.empty()); + // Check the linked instruction is Alloca. + Instruction *LinkedInst = *Insts.begin(); + EXPECT_EQ(LinkedInst, &Alloca); + } + }; + CheckFun1Mapping(); + + // 2. Check DIAssignID RAUW replaces attachments and uses. + // + DIAssignID *Old = + cast_or_null(Alloca.getMetadata(LLVMContext::MD_DIAssignID)); + DIAssignID *New = DIAssignID::getDistinct(C); + ASSERT_TRUE(Old && New && New != Old); + at::RAUW(Old, New); + // Check fun1's alloca and intrinsics have been updated and the mapping still + // works. + EXPECT_EQ(New, cast_or_null( + Alloca.getMetadata(LLVMContext::MD_DIAssignID))); + CheckFun1Mapping(); + + // Check that fun2's alloca and intrinsic have not not been updated. + Instruction &Fun2Alloca = + *M->getFunction("fun2")->getEntryBlock().getFirstNonPHIOrDbg(); + DIAssignID *Fun2ID = cast_or_null( + Fun2Alloca.getMetadata(LLVMContext::MD_DIAssignID)); + EXPECT_NE(New, Fun2ID); + auto Fun2Markers = at::getAssignmentMarkers(&Fun2Alloca); + ASSERT_TRUE(std::distance(Fun2Markers.begin(), Fun2Markers.end()) == 1); + auto Fun2Insts = at::getAssignmentInsts(*Fun2Markers.begin()); + ASSERT_TRUE(std::distance(Fun2Insts.begin(), Fun2Insts.end()) == 1); + EXPECT_EQ(*Fun2Insts.begin(), &Fun2Alloca); + + // 3. Check that deleting works and applies only to the target function. + at::deleteAll(&Fun1); + // There should now only be the alloca and ret in fun1. + EXPECT_EQ(Fun1.begin()->size(), 2); + // fun2's alloca should have the same DIAssignID and remain linked to its + // llvm.dbg.assign. + EXPECT_EQ(Fun2ID, cast_or_null( + Fun2Alloca.getMetadata(LLVMContext::MD_DIAssignID))); + EXPECT_FALSE(at::getAssignmentMarkers(&Fun2Alloca).empty()); +} + } // end namespace