Dead Argument Elimination pass when removing return value clobbers first llvm.dbg.value’s argument for live arguments of that function by replacing it with nullptr. In the next pass it will be deleted, so debug location about those arguments are lost. This change fixes it.
Details
Diff Detail
- Repository
- rL LLVM
Event Timeline
Thanks! Could you please explain what the dead argument elimination pass is doing and what you patch changes?
I'm worried that this would cause different code being generated depending on whether debug info is enabled or not. In LLVM we consider it a bug if debug info has any effect on the generated code. Instead we have llvm::salvageDebugInfo() to preserve debug info from deleted instructions.
@aprantl Thanks for your comment!
Dead argument elimination pass removes dead return values or dead function arguments. It is detected that when it removing a return value from a function, when it supposed to do 'replaceAllUsesWith()' (line:968) for live function arguments it generates llvm.dbg.value as following:
...
call void @llvm.dbg.value(metadata !2, metadata !39, metadata !DIExpression()), !dbg !42
...
!2 = !{}
...
So, that llvm.dbg.value is not useful and in the next optimization pass it is going to be recognized as dead and it will be deleted.
After applying the patch the llvm.dbg.value would look:
...
call void @llvm.dbg.value(metadata i1 %is_y, metadata !39, metadata !DIExpression()), !dbg !42
...
and it will be propagated to 'debug_loc' properly.
I am aware of llvm::salvageDebugInfo(), but I think that we can not use it in this situation.
Maybe some additional condition when calling 'replaceMetadataUsesWithPDBG()' would be enough to avoid potential differences in generated code. Any suggestion?
Thanks. Here is the before/after of the testcase in case anyone else is wondering:
--- /tmp/t.ll 2018-01-25 10:17:20.000000000 -0800 +++ /tmp/2.ll 2018-01-25 10:17:40.000000000 -0800 @@ -1,80 +1,79 @@ -; RUN: opt -deadargelim -S < %s | FileCheck %s +; ModuleID = '/tmp/t.ll' +source_filename = "/tmp/t.ll" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" %struct.Channel = type { i32, i32 } -; Function Attrs: nounwind uwtable -define void @f2(i32 %m, i32 %n) #0 !dbg !7 { +define void @f2(i32 %m, i32 %n) !dbg !7 { entry: call void @llvm.dbg.value(metadata i32 %m, metadata !12, metadata !DIExpression()), !dbg !21 call void @llvm.dbg.value(metadata i32 %n, metadata !13, metadata !DIExpression()), !dbg !22 call void @llvm.dbg.value(metadata %struct.Channel* null, metadata !14, metadata !DIExpression()), !dbg !23 %call = call %struct.Channel* (...) @foo(), !dbg !24 call void @llvm.dbg.value(metadata %struct.Channel* %call, metadata !14, metadata !DIExpression()), !dbg !23 %cmp = icmp sgt i32 %m, 3, !dbg !25 br i1 %cmp, label %if.then, label %if.end, !dbg !27 if.then: ; preds = %entry - %call1 = call zeroext i1 @f1(i1 zeroext true, %struct.Channel* %call), !dbg !28 + call void @f1(i1 zeroext true, %struct.Channel* %call), !dbg !28 br label %if.end, !dbg !28 if.end: ; preds = %if.then, %entry %cmp2 = icmp sgt i32 %n, %m, !dbg !29 br i1 %cmp2, label %if.then3, label %if.end5, !dbg !31 if.then3: ; preds = %if.end - %call4 = call zeroext i1 @f1(i1 zeroext false, %struct.Channel* %call), !dbg !32 + call void @f1(i1 zeroext false, %struct.Channel* %call), !dbg !32 br label %if.end5, !dbg !32 if.end5: ; preds = %if.then3, %if.end ret void, !dbg !33 } -declare %struct.Channel* @foo(...) local_unnamed_addr #1 +declare %struct.Channel* @foo(...) local_unnamed_addr -; Function Attrs: noinline nounwind uwtable -define internal zeroext i1 @f1(i1 zeroext %is_y, %struct.Channel* %str) #4 !dbg !34 { +define internal void @f1(i1 zeroext %is_y, %struct.Channel* %str) !dbg !34 { entry: %frombool = zext i1 %is_y to i8 -; CHECK: call void @llvm.dbg.value(metadata i1 %is_y, metadata !39, metadata !DIExpression()), !dbg !42 - call void @llvm.dbg.value(metadata i1 %is_y, metadata !39, metadata !DIExpression()), !dbg !42 -; CHECK: call void @llvm.dbg.value(metadata %struct.Channel* %str, metadata !40, metadata !DIExpression()), !dbg !43 - call void @llvm.dbg.value(metadata %struct.Channel* %str, metadata !40, metadata !DIExpression()), !dbg !43 + call void @llvm.dbg.value(metadata !2, metadata !39, metadata !DIExpression()), !dbg !42 + call void @llvm.dbg.value(metadata !2, metadata !40, metadata !DIExpression()), !dbg !43 call void @llvm.dbg.value(metadata %struct.Channel* null, metadata !41, metadata !DIExpression()), !dbg !44 %tobool = icmp ne %struct.Channel* %str, null, !dbg !45 br i1 %tobool, label %if.end, label %if.then, !dbg !47 if.then: ; preds = %entry call void (...) @baa(), !dbg !48 br label %cleanup, !dbg !50 if.end: ; preds = %entry %call = call %struct.Channel* (...) @foo(), !dbg !51 call void @llvm.dbg.value(metadata %struct.Channel* %call, metadata !41, metadata !DIExpression()), !dbg !44 %tobool1 = trunc i8 %frombool to i1, !dbg !52 - br i1 %tobool1, label %if.then2, label %if.end3, !dbg !56 + br i1 %tobool1, label %if.then2, label %if.end3, !dbg !53 if.then2: ; preds = %if.end - call void (...) @baa(), !dbg !57 - br label %cleanup, !dbg !56 + call void (...) @baa(), !dbg !56 + br label %cleanup, !dbg !53 if.end3: ; preds = %if.end - br label %cleanup, !dbg !56 + br label %cleanup, !dbg !53 cleanup: ; preds = %if.end3, %if.then2, %if.then %retval.0 = phi i1 [ false, %if.then2 ], [ true, %if.end3 ], [ false, %if.then ] - ret i1 %retval.0, !dbg !56 + ret void } -declare void @baa(...) local_unnamed_addr #1 +declare void @baa(...) local_unnamed_addr ; Function Attrs: nounwind readnone speculatable -declare void @llvm.dbg.value(metadata, metadata, metadata) #3 +declare void @llvm.dbg.value(metadata, metadata, metadata) #0 + +attributes #0 = { nounwind readnone speculatable } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5} !llvm.ident = !{!6} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 7.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) !1 = !DIFile(filename: "test.c", directory: "/dir") !2 = !{} @@ -123,13 +122,12 @@ !45 = !DILocation(line: 16, column: 7, scope: !46) !46 = distinct !DILexicalBlock(scope: !34, file: !1, line: 16, column: 6) !47 = !DILocation(line: 16, column: 6, scope: !34) !48 = !DILocation(line: 17, column: 3, scope: !49) !49 = distinct !DILexicalBlock(scope: !46, file: !1, line: 16, column: 11) !50 = !DILocation(line: 18, column: 3, scope: !49) !51 = !DILocation(line: 21, column: 9, scope: !34) !52 = !DILocation(line: 23, column: 6, scope: !34) -!53 = !DILocation(line: 24, column: 3, scope: !54) +!53 = !DILocation(line: 25, column: 3, scope: !54) !54 = distinct !DILexicalBlock(scope: !55, file: !1, line: 23, column: 11) !55 = distinct !DILexicalBlock(scope: !34, file: !1, line: 23, column: 6) -!56 = !DILocation(line: 25, column: 3, scope: !54) -!57 = !DILocation(line: 28, column: 2, scope: !34) +!56 = !DILocation(line: 28, column: 2, scope: !34)
The effect of the patch is desirable and looks good to me. I'm not sure yet about the concrete implementation, I wonder if there is a better way of implementing it.
I agree with you, that is the reason I started conversation. I saw how it was done in https://reviews.llvm.org/D27534 (where the problem in general is different but it has similarity with this one) and I have made similar solution.
In general, RAUW replaces all uses of 'this' with 'MD'. But, in handleRAUW() (Metadata.cpp:384) when it is detected that Function is changed it replaces all uses with nullptr. That is the piece of LLVM source code where in optimized code we lose debug loc about those arguments.
I think I now get what the code in lib/IR/Metadata.cpp:413 is supposed to be doing.
It checks whether the RAUW operation moves the Value from one function to another, and if yes, it (correctly) assumes that the debug info should be destroyed in the process. The dead function argument pass is a really special case that rewrites the function signature of the current function and thus also triggers this condition.
A simpler fix could be to compare the DISubprogram attachment instead of the functions themselves and call it getLocalFunctionMetadata() or something.
Hi @djtodoro, thanks for looking into this. I have a few comments/suggestions noted inline.
lib/IR/Value.cpp | ||
---|---|---|
420 ↗ | (On Diff #131455) | Just a suggestion, can we keep the name as handleRAWU, and call this directly with the bool PreserveDbg here? I think that would allow us to get rid of the handlers on line 442 in Metadata.cpp. |
lib/Transforms/IPO/DeadArgumentElimination.cpp | ||
968 ↗ | (On Diff #131455) | The term 'PDBG' in replaceMetadataUsesWithPDBG makes me think of PDB (program database), this change is about DI so the term might be misleading to some. Perhaps we just spell-out Preserve here, or use PDbg, or remove the handler and add a default bool to replaceAllUsesWith(). |
include/llvm/IR/Value.h | ||
---|---|---|
257 ↗ | (On Diff #131455) | Maybe have this default to false so we don't have to change unaffected calls? |
lib/IR/Metadata.cpp | ||
442 ↗ | (On Diff #131455) | Previous comment would probably render these unnecessary? |
lib/IR/Value.cpp | ||
420 ↗ | (On Diff #131455) | Aha, exactly what I was thinking too! :-) +1 |
Hi @mattd and @JDevlieghere, thanks a lot for your comments! They sound reasonable, I just updated the patch. I had the idea in my mind about using default bool values, but I saw some solutions for similar problems and made a bunch of handlers, but right now it looks more readable and maintainable. Thanks again!
@aprantl How does it look now?
Sorry I should have expressed this more clearly yesterday:
Instead of threading an extra flag through all of RAUW, we should replace the use of getLocalFunction() with getLocalFunction()->getSubprogram().
@aprantl I understood your idea, I thought this could be acceptable if refactored.
I will do it that way and reapply the patch. Thanks!
lib/IR/Metadata.cpp | ||
---|---|---|
342 ↗ | (On Diff #131643) | One stylistic suggestion: |
LGTM with outstanding changes applied.
lib/IR/Metadata.cpp | ||
---|---|---|
342 ↗ | (On Diff #131643) | getLocalFunction() is also static and not used by anyone else. I would just roll this into one function. |
419 ↗ | (On Diff #131643) | One of these checks is redundant. How about: if (DISubprogram *FromSP = getLocalFunctionMetadata(From)) if (getLocalFunctionMetadata(To) == FromSP) |