diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1203,6 +1203,13 @@ bool isIdenticalTo(const MachineInstr &Other, MICheckType Check = CheckDefs) const; + /// Returns true if this instruction is a debug instruction that represents an + /// identical debug value to \p Other. + /// This function considers these debug instructions equivalent if they have + /// identical variables, debug locations, and debug operands, and if the + /// DIExpressions combined with the directness flags are equivalent. + bool isEquivalentDbgInstr(const MachineInstr &Other) const; + /// Unlink 'this' from the containing basic block, and return it without /// deleting it. /// diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -2794,6 +2794,31 @@ /// it cannot be a simple register location. bool isComplex() const; + /// Inserts the elements of \p Expr into \p Ops modified to a canonical form, + /// which uses DW_OP_LLVM_arg (i.e. is a variadic expression) and folds the + /// implied derefence from the \p IsIndirect flag into the expression. This + /// allows us to check equivalence between expressions with differing + /// directness or variadicness. + static void canonicalizeExpressionOps(SmallVectorImpl &Ops, + const DIExpression *Expr, + bool IsIndirect); + + /// Determines whether two debug values should produce equivalent DWARF + /// expressions, using their DIExpressions and directness, ignoring the + /// differences between otherwise identical expressions in variadic and + /// non-variadic form and not considering the debug operands. + /// \p FirstExpr is the DIExpression for the first debug value. + /// \p FirstIndirect should be true if the first debug value is indirect; in + /// IR this should be true for dbg.declare and dbg.addr intrinsics and false + /// for dbg.values, and in MIR this should be true only for DBG_VALUE + /// instructions whose second operand is an immediate value. + /// \p SecondExpr and \p SecondIndirect have the same meaning as the prior + /// arguments, but apply to the second debug value. + static bool isEqualExpression(const DIExpression *FirstExpr, + bool FirstIndirect, + const DIExpression *SecondExpr, + bool SecondIndirect); + /// Append \p Ops with operations to apply the \p Offset. static void appendOffset(SmallVectorImpl &Ops, int64_t Offset); diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp --- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -76,7 +76,7 @@ auto &Entries = VarEntries[Var]; if (!Entries.empty() && Entries.back().isDbgValue() && !Entries.back().isClosed() && - Entries.back().getInstr()->isIdenticalTo(MI)) { + Entries.back().getInstr()->isEquivalentDbgInstr(MI)) { LLVM_DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" << "\t" << Entries.back().getInstr() << "\t" << MI << "\n"); diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h --- a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -76,6 +76,9 @@ : EntryKind(E_TargetIndexLocation), TIL(Loc) {} bool isLocation() const { return EntryKind == E_Location; } + bool isIndirectLocation() const { + return EntryKind == E_Location && Loc.isIndirect(); + } bool isTargetIndexLocation() const { return EntryKind == E_TargetIndexLocation; } @@ -150,6 +153,29 @@ bool isFragment() const { return getExpression()->isFragment(); } bool isEntryVal() const { return getExpression()->isEntryValue(); } bool isVariadic() const { return IsVariadic; } + bool isEquivalent(const DbgValueLoc &Other) const { + // Cannot be equivalent with different numbers of entries. + if (ValueLocEntries.size() != Other.ValueLocEntries.size()) + return false; + bool ThisIsIndirect = + !IsVariadic && ValueLocEntries[0].isIndirectLocation(); + bool OtherIsIndirect = + !Other.IsVariadic && Other.ValueLocEntries[0].isIndirectLocation(); + // Check equivalence of DIExpressions + Directness together. + if (!DIExpression::isEqualExpression(Expression, ThisIsIndirect, + Other.Expression, OtherIsIndirect)) + return false; + // Indirectness should have been accounted for in the above check, so just + // compare register values directly here. + if (ThisIsIndirect || OtherIsIndirect) { + DbgValueLocEntry ThisOp = ValueLocEntries[0]; + DbgValueLocEntry OtherOp = Other.ValueLocEntries[0]; + return ThisOp.isLocation() && OtherOp.isLocation() && + ThisOp.getLoc().getReg() == OtherOp.getLoc().getReg(); + } + // If neither are indirect, then just compare the loc entries directly. + return ValueLocEntries == Other.ValueLocEntries; + } const DIExpression *getExpression() const { return Expression; } ArrayRef getLocEntries() const { return ValueLocEntries; } friend bool operator==(const DbgValueLoc &, const DbgValueLoc &); @@ -191,11 +217,15 @@ /// Entry. bool MergeRanges(const DebugLocEntry &Next) { // If this and Next are describing the same variable, merge them. - if ((End == Next.Begin && Values == Next.Values)) { - End = Next.End; - return true; - } - return false; + if (End != Next.Begin) + return false; + if (Values.size() != Next.Values.size()) + return false; + for (unsigned EntryIdx = 0; EntryIdx < Values.size(); ++EntryIdx) + if (!Values[EntryIdx].isEquivalent(Next.Values[EntryIdx])) + return false; + End = Next.End; + return true; } const MCSymbol *getBeginSym() const { return Begin; } diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -248,6 +248,11 @@ Indirect = MI.isDebugOffsetImm(); } + bool isJoinable(const DbgValueProperties &Other) const { + return DIExpression::isEqualExpression(DIExpr, Indirect, Other.DIExpr, + Other.Indirect); + } + bool operator==(const DbgValueProperties &Other) const { return std::tie(DIExpr, Indirect, IsVariadic) == std::tie(Other.DIExpr, Other.Indirect, Other.IsVariadic); diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -2676,7 +2676,7 @@ if (OutVal.isUnjoinedPHI() && OutVal.BlockNo != MBB.getNumber()) return false; - if (FirstValue.Properties != OutVal.Properties) + if (!FirstValue.Properties.isJoinable(OutVal.Properties)) return false; for (unsigned Idx = 0; Idx < FirstValue.getLocationOpCount(); ++Idx) { @@ -2864,7 +2864,7 @@ // different DIExpressions, different indirectness, or are mixed constants / // non-constants. for (const auto &V : Values) { - if (V.second->Properties != FirstVal.Properties) + if (!V.second->Properties.isJoinable(FirstVal.Properties)) return false; if (V.second->Kind == DbgValue::NoVal) return false; diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -674,6 +674,25 @@ return true; } +bool MachineInstr::isEquivalentDbgInstr(const MachineInstr &Other) const { + if (!isDebugValue() || !Other.isDebugValue()) + return false; + if (getDebugLoc() != Other.getDebugLoc()) + return false; + if (getDebugVariable() != Other.getDebugVariable()) + return false; + if (getNumDebugOperands() != Other.getNumDebugOperands()) + return false; + for (unsigned OpIdx = 0; OpIdx < getNumDebugOperands(); ++OpIdx) + if (!getDebugOperand(OpIdx).isIdenticalTo(Other.getDebugOperand(OpIdx))) + return false; + if (!DIExpression::isEqualExpression( + getDebugExpression(), isIndirectDebugValue(), + Other.getDebugExpression(), Other.isIndirectDebugValue())) + return false; + return true; +} + const MachineFunction *MachineInstr::getMF() const { return getParent()->getParent(); } diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1441,6 +1441,48 @@ return false; } +void DIExpression::canonicalizeExpressionOps(SmallVectorImpl &Ops, + const DIExpression *Expr, + bool IsIndirect) { + // If Expr is not already variadic, insert the implied `DW_OP_LLVM_arg 0` + // to the existing expression ops. + if (none_of(Expr->expr_ops(), [](auto ExprOp) { + return ExprOp.getOp() == dwarf::DW_OP_LLVM_arg; + })) + Ops.append({dwarf::DW_OP_LLVM_arg, 0}); + // If Expr is not indirect, we only need to insert the expression elements and + // we're done. + if (!IsIndirect) { + Ops.append(Expr->elements_begin(), Expr->elements_end()); + return; + } + // If Expr is indirect, insert the implied DW_OP_deref at the end of the + // expression but before DW_OP_{stack_value, LLVM_fragment} if they are + // present. + for (auto Op : Expr->expr_ops()) { + if (Op.getOp() == dwarf::DW_OP_stack_value || + Op.getOp() == dwarf::DW_OP_LLVM_fragment) { + Ops.push_back(dwarf::DW_OP_deref); + IsIndirect = false; + } + Op.appendToVector(Ops); + } + if (IsIndirect) + Ops.push_back(dwarf::DW_OP_deref); +} + +bool DIExpression::isEqualExpression(const DIExpression *FirstExpr, + bool FirstIndirect, + const DIExpression *SecondExpr, + bool SecondIndirect) { + SmallVector FirstOps; + DIExpression::canonicalizeExpressionOps(FirstOps, FirstExpr, FirstIndirect); + SmallVector SecondOps; + DIExpression::canonicalizeExpressionOps(SecondOps, SecondExpr, + SecondIndirect); + return FirstOps == SecondOps; +} + std::optional DIExpression::getFragmentInfo(expr_op_iterator Start, expr_op_iterator End) { for (auto I = Start; I != End; ++I) diff --git a/llvm/test/DebugInfo/Generic/merge-equivalent-ranges.ll b/llvm/test/DebugInfo/Generic/merge-equivalent-ranges.ll new file mode 100644 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/merge-equivalent-ranges.ll @@ -0,0 +1,75 @@ + +; RUN: %llc_dwarf -filetype=obj -o %t.o < %s +; RUN: llvm-dwarfdump %t.o | FileCheck %s + +;; Checks that when we have a set of debug values that are all different but +;; result in identical DW_AT_location entries, we are able to merge them into a +;; single DW_AT_location instead of producing a loclist with identical locations +;; for each. + +;; Checks that we can merge an indirect debug value with an equivalent direct +;; debug value that uses DW_OP_deref. +; CHECK: DW_AT_location +; CHECK-SAME: (DW_OP_fbreg +8) +; CHECK-NEXT: DW_AT_name ("Var1") + +;; Checks that we can merge a non-variadic debug value with an equivalent +;; variadic debug value. +; CHECK: DW_AT_location +; CHECK-SAME: (DW_OP_fbreg +8, DW_OP_deref, DW_OP_stack_value) +; CHECK-NEXT: DW_AT_name ("Var2") + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%"struct.std::_Rb_tree_node_base" = type { i32, ptr, ptr, ptr } + +define i32 @_ZN4llvm9MCContext12GetDwarfFileENS_9StringRefES1_jj(ptr %this, ptr %A, i64 %B, ptr %C, i64 %D, i32 %E, i32 %CUID, i1 %cmp.i.i.i.i.i) !dbg !10 { +entry: + %CUID.addr = alloca i32, align 4 + store i32 %CUID, ptr %CUID.addr, align 4 + call void @llvm.dbg.value(metadata ptr %CUID.addr, metadata !16, metadata !DIExpression(DW_OP_deref)), !dbg !17 + call void @llvm.dbg.value(metadata ptr %CUID.addr, metadata !26, metadata !DIExpression(DW_OP_deref, DW_OP_stack_value)), !dbg !17 + %0 = load ptr, ptr null, align 8, !dbg !18 + call void @llvm.dbg.value(metadata !DIArgList(ptr %CUID.addr), metadata !26, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_deref, DW_OP_stack_value)), !dbg !18 + br label %while.body.i.i.i.i + +while.body.i.i.i.i: ; preds = %while.body.i.i.i.i, %entry + %__x.addr.011.i.i.i.i = phi ptr [ %__x.addr.1.in.i.i.i.i, %while.body.i.i.i.i ], [ %0, %entry ] + %_M_right.i.i.i.i.i = getelementptr inbounds %"struct.std::_Rb_tree_node_base", ptr %__x.addr.011.i.i.i.i, i64 0, i32 3, !dbg !20 + call void @llvm.dbg.addr(metadata ptr %CUID.addr, metadata !16, metadata !DIExpression()), !dbg !20 + %__x.addr.1.in.i.i.i.i = select i1 %cmp.i.i.i.i.i, ptr %__x.addr.011.i.i.i.i, ptr null, !dbg !20 + br label %while.body.i.i.i.i +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) +declare void @llvm.dbg.addr(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.cpp", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{i32 7, !"frame-pointer", i32 2} +!9 = !{!"clang version 16.0.0"} +!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooPi", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !15) +!11 = !DISubroutineType(types: !12) +!12 = !{!13, !14} +!13 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!14 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!15 = !{} +!16 = !DILocalVariable(name: "Var1", scope: !10, file: !1, line: 1, type: !14) +!17 = !DILocation(line: 1, column: 14, scope: !10) +!18 = !DILocation(line: 2, column: 3, scope: !10) +!19 = !DILocation(line: 2, column: 5, scope: !10) +!20 = !DILocation(line: 3, column: 10, scope: !10) +!21 = !DILocation(line: 3, column: 9, scope: !10) +!22 = !DILocation(line: 3, column: 2, scope: !10) +!26 = !DILocalVariable(name: "Var2", scope: !10, file: !1, line: 1, type: !14) diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp --- a/llvm/unittests/IR/MetadataTest.cpp +++ b/llvm/unittests/IR/MetadataTest.cpp @@ -3110,6 +3110,49 @@ #undef EXPECT_REPLACE_ARG_EQ } +TEST_F(DIExpressionTest, isEqualExpression) { +#define EXPECT_EQ_DEBUG_VALUE(ExprA, DirectA, ExprB, DirectB) \ + EXPECT_TRUE(DIExpression::isEqualExpression(ExprA, DirectA, ExprB, DirectB)) +#define EXPECT_NE_DEBUG_VALUE(ExprA, DirectA, ExprB, DirectB) \ + EXPECT_FALSE(DIExpression::isEqualExpression(ExprA, DirectA, ExprB, DirectB)) +#define GET_EXPR(...) DIExpression::get(Context, {__VA_ARGS__}) + + EXPECT_EQ_DEBUG_VALUE(GET_EXPR(), false, GET_EXPR(), false); + EXPECT_NE_DEBUG_VALUE(GET_EXPR(), false, GET_EXPR(), true); + EXPECT_EQ_DEBUG_VALUE( + GET_EXPR(dwarf::DW_OP_plus_uconst, 32), true, + GET_EXPR(dwarf::DW_OP_plus_uconst, 32, dwarf::DW_OP_deref), false); + EXPECT_NE_DEBUG_VALUE( + GET_EXPR(dwarf::DW_OP_plus_uconst, 16, dwarf::DW_OP_deref), true, + GET_EXPR(dwarf::DW_OP_plus_uconst, 16, dwarf::DW_OP_deref), false); + EXPECT_EQ_DEBUG_VALUE( + GET_EXPR(dwarf::DW_OP_LLVM_arg, 0, dwarf::DW_OP_plus_uconst, 5), false, + GET_EXPR(dwarf::DW_OP_plus_uconst, 5), false); + EXPECT_NE_DEBUG_VALUE(GET_EXPR(dwarf::DW_OP_LLVM_arg, 0, dwarf::DW_OP_plus), + false, + GET_EXPR(dwarf::DW_OP_LLVM_arg, 0, + dwarf::DW_OP_LLVM_arg, 0, dwarf::DW_OP_plus), + false); + EXPECT_NE_DEBUG_VALUE(GET_EXPR(dwarf::DW_OP_LLVM_arg, 0, dwarf::DW_OP_constu, + 8, dwarf::DW_OP_minus), + false, + GET_EXPR(dwarf::DW_OP_constu, 8, dwarf::DW_OP_LLVM_arg, + 0, dwarf::DW_OP_minus), + false); + // These expressions are actually equivalent, but we do not currently identify + // commutative operations with different operand orders as being equivalent. + EXPECT_NE_DEBUG_VALUE(GET_EXPR(dwarf::DW_OP_LLVM_arg, 0, dwarf::DW_OP_constu, + 8, dwarf::DW_OP_plus), + false, + GET_EXPR(dwarf::DW_OP_constu, 8, dwarf::DW_OP_LLVM_arg, + 0, dwarf::DW_OP_plus), + false); + +#undef EXPECT_EQ_DEBUG_VALUE +#undef EXPECT_NE_DEBUG_VALUE +#undef GET_EXPR +} + TEST_F(DIExpressionTest, foldConstant) { const ConstantInt *Int; const ConstantInt *NewInt;