diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -4594,13 +4594,6 @@ bool MadeIRChange = false; if (ShouldLowerDbgDeclare) MadeIRChange = LowerDbgDeclare(F); - // LowerDbgDeclare calls RemoveRedundantDbgInstrs, but LowerDbgDeclare will - // almost never return true when running an assignment tracking build. Take - // this opportunity to do some clean up for assignment tracking builds too. - if (!MadeIRChange && isAssignmentTrackingEnabled(*F.getParent())) { - for (auto &BB : F) - RemoveRedundantDbgInstrs(&BB); - } // Iterate while there is work to do. unsigned Iteration = 0; diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -5048,6 +5048,11 @@ if (!Changed) return PreservedAnalyses::all(); + if (isAssignmentTrackingEnabled(*F.getParent())) { + for (auto &BB : F) + RemoveRedundantDbgInstrs(&BB); + } + PreservedAnalyses PA; if (!CFGChanged) PA.preserveSet(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -98,6 +98,7 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" @@ -10606,6 +10607,11 @@ return PreservedAnalyses::all(); PreservedAnalyses PA; + if (isAssignmentTrackingEnabled(*F.getParent())) { + for (auto &BB : F) + RemoveRedundantDbgInstrs(&BB); + } + // We currently do not preserve loopinfo/dominator analyses with outer loop // vectorization. Until this is addressed, mark these analyses as preserved // only for non-VPlan-native path. diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/remove-redundant-dbg.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/remove-redundant-dbg.ll --- a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/remove-redundant-dbg.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/remove-redundant-dbg.ll @@ -1,17 +1,17 @@ -; RUN: opt -passes=instcombine -S %s -o - \ +; RUN: opt -passes=sroa -S %s -o - \ ; RUN: | FileCheck %s --implicit-check-not="call void @llvm.dbg" -;; Check that instcombine removes redundant debug intrinsics. This has a -;; significant positive impact on peak memory. CTMark's tramp3d-v4's peak -;; memory with assignment tracking enabled is reduced by about 15% with this -;; change. +;; Check that sroa removes redundant debug intrinsics after it makes a +;; change. This has a significant positive impact on peak memory and compiler +;; run time. ; CHECK: @llvm.dbg.assign(metadata i32 1 define dso_local void @_Z3funv() local_unnamed_addr !dbg !7 { entry: - call void @llvm.dbg.assign(metadata i32 undef, metadata !11, metadata !DIExpression(), metadata !13, metadata ptr undef, metadata !DIExpression()), !dbg !14 - call void @_Z3extv(), !dbg !15 + %sroa-remove-me = alloca i32 + call void @llvm.dbg.assign(metadata i32 undef, metadata !11, metadata !DIExpression(), metadata !13, metadata ptr undef, metadata !DIExpression()), !dbg !14 + call void @_Z3extv(), !dbg !15 call void @llvm.dbg.assign(metadata i32 0, metadata !11, metadata !DIExpression(), metadata !13, metadata ptr undef, metadata !DIExpression()), !dbg !14 call void @llvm.dbg.assign(metadata i32 1, metadata !11, metadata !DIExpression(), metadata !13, metadata ptr undef, metadata !DIExpression()), !dbg !14 ret void, !dbg !16 diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/loop-vectorize/remove-redundant-dbg.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/loop-vectorize/remove-redundant-dbg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/loop-vectorize/remove-redundant-dbg.ll @@ -0,0 +1,61 @@ +; RUN: opt %s -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S -o - \ +; RUN: | FileCheck %s --implicit-check-not="call void @llvm.dbg" + +;; Check that loop-vectorize removes redundant debug intrinsics after it makes +;; a change. This has a significant positive impact on peak memory and compiler +;; run time. + +;; Check there is only one dbg.assign. +; CHECK: call void @llvm.dbg.assign + +;; Check that the loop was actually modified. +; CHECK: extractelement + + +define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) { +entry: + call void @llvm.dbg.assign(metadata ptr %a, metadata !11, metadata !DIExpression(), metadata !16, metadata ptr undef, metadata !DIExpression()), !dbg !28 + call void @llvm.dbg.assign(metadata ptr %a, metadata !11, metadata !DIExpression(), metadata !16, metadata ptr undef, metadata !DIExpression()), !dbg !28 + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 + %cmp1 = fcmp ogt float %0, 1.000000e+02 + tail call void @llvm.assume(i1 %cmp1) + %add = fadd float %0, 1.000000e+00 + %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv + store float %add, ptr %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv, 1599 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +declare void @llvm.assume(i1) #0 +declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !1000} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 14.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.cpp", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 7, !"uwtable", i32 1} +!6 = !{!"clang version 14.0.0)"} +!7 = distinct !DISubprogram(name: "fun", linkageName: "_Z3funv", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !10) +!8 = !DISubroutineType(types: !9) +!9 = !{null} +!10 = !{!11} +!11 = !DILocalVariable(name: "Counter", scope: !7, file: !1, line: 3, type: !12) +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!16 = distinct !DIAssignID() +!19 = distinct !DILexicalBlock(scope: !7, file: !1, line: 4, column: 3) +!28 = !DILocation(line: 6, column: 1, scope: !7) +!1000 = !{i32 7, !"debug-info-assignment-tracking", i1 true} diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/after-inlining.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/after-inlining.ll --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/after-inlining.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/after-inlining.ll @@ -28,7 +28,7 @@ ;; ;; $ clang test.c -Xclang -fexperimental-assignment-tracking -O2 -g -; CHECK: call void @llvm.dbg.assign(metadata i1 undef, metadata !{{.+}}, metadata !DIExpression(), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg ![[DBG:[0-9]+]] +; CHECK: call void @llvm.dbg.assign(metadata i1 false, metadata !{{.+}}, metadata !DIExpression(), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg ![[DBG:[0-9]+]] ; CHECK-DAG: ![[DBG]] = !DILocation(line: 0, scope: ![[INL_SC:[0-9]+]], inlinedAt: ![[IA:[0-9]+]]) ; CHECK-DAG: ![[IA]] = distinct !DILocation(line: 21, column: 12, scope: ![[SC:[0-9]+]]) @@ -47,7 +47,9 @@ define dso_local void @l() local_unnamed_addr #4 !dbg !73 { entry: %j.i = alloca %struct.c, align 4, !DIAssignID !74 - call void @llvm.dbg.assign(metadata i1 undef, metadata !64, metadata !DIExpression(), metadata !74, metadata ptr %j.i, metadata !DIExpression()) #5, !dbg !75 + ; NOTE: this has been changed from undef to false so that the intrinsic isn't + ; deleted as redundant. + call void @llvm.dbg.assign(metadata i1 false, metadata !64, metadata !DIExpression(), metadata !74, metadata ptr %j.i, metadata !DIExpression()) #5, !dbg !75 %0 = bitcast ptr %j.i to ptr, !dbg !77 call void @llvm.lifetime.start.p0i8(i64 4, ptr nonnull %0) #5, !dbg !77 %arrayidx.i.i = getelementptr inbounds %struct.c, ptr %j.i, i64 0, i32 1, i64 0, !dbg !78 diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/remove-redundant-dbg.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/remove-redundant-dbg.ll copy from llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/remove-redundant-dbg.ll copy to llvm/test/DebugInfo/Generic/assignment-tracking/sroa/remove-redundant-dbg.ll --- a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/remove-redundant-dbg.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/remove-redundant-dbg.ll @@ -1,17 +1,17 @@ -; RUN: opt -passes=instcombine -S %s -o - \ +; RUN: opt -passes=sroa -S %s -o - \ ; RUN: | FileCheck %s --implicit-check-not="call void @llvm.dbg" -;; Check that instcombine removes redundant debug intrinsics. This has a -;; significant positive impact on peak memory. CTMark's tramp3d-v4's peak -;; memory with assignment tracking enabled is reduced by about 15% with this -;; change. +;; Check that sroa removes redundant debug intrinsics after it makes a +;; change. This has a significant positive impact on peak memory and compiler +;; run time. ; CHECK: @llvm.dbg.assign(metadata i32 1 define dso_local void @_Z3funv() local_unnamed_addr !dbg !7 { entry: - call void @llvm.dbg.assign(metadata i32 undef, metadata !11, metadata !DIExpression(), metadata !13, metadata ptr undef, metadata !DIExpression()), !dbg !14 - call void @_Z3extv(), !dbg !15 + %sroa-remove-me = alloca i32 + call void @llvm.dbg.assign(metadata i32 undef, metadata !11, metadata !DIExpression(), metadata !13, metadata ptr undef, metadata !DIExpression()), !dbg !14 + call void @_Z3extv(), !dbg !15 call void @llvm.dbg.assign(metadata i32 0, metadata !11, metadata !DIExpression(), metadata !13, metadata ptr undef, metadata !DIExpression()), !dbg !14 call void @llvm.dbg.assign(metadata i32 1, metadata !11, metadata !DIExpression(), metadata !13, metadata ptr undef, metadata !DIExpression()), !dbg !14 ret void, !dbg !16 diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/user-memcpy.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/user-memcpy.ll --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/user-memcpy.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/user-memcpy.ll @@ -25,7 +25,6 @@ ;; | V3i point = {0, 0, 0}; ; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 0, metadata ![[point:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg ; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 0, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 0, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg ;; point.z = 5000; ; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 5000, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg