This is an archive of the discontinued LLVM Phabricator instance.

[MergeFuncs] Compare load instruction metadata
ClosedPublic

Authored by dingxiangfei2009 on Feb 23 2023, 3:34 PM.

Details

Summary

Today we realize that we are able to hit a corner case that leads to mis-compilation. This is due to interaction of merge function passes with other optimization passes involving !nonnull assertions.

By applying MergeFunctions, Inline, SROA and InstCombine in this order to the following IR, the program semantics changes from calling the external function @out with arguments (some pointer, null) to calling @out with (some pointer, some other non-null pointer).

target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
; target triple = "arm64-apple-macosx11.0.0"

%1 = type { { ptr, i64 }, ptr }
%optstring = type { [1 x i64], ptr, [1 x i64] }

@emptystr = external hidden unnamed_addr constant <{}>, align 8

define hidden void @f1(ptr noalias nocapture noundef sret(%1) dereferenceable(24) %0, ptr noalias nocapture noundef dereferenceable(24) %1) unnamed_addr #0 {
  %3 = getelementptr inbounds { ptr, i64 }, ptr %1, i32 0, i32 0
  %4 = load ptr, ptr %3, align 8, !nonnull !2, !align !3, !noundef !2
  %5 = getelementptr inbounds { ptr, i64 }, ptr %1, i32 0, i32 1
  %6 = load i64, ptr %5, align 8
  %7 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %1, i32 0, i32 1
  %8 = load ptr, ptr %7, align 8, !nonnull !2, !align !6, !noundef !2
  %9 = getelementptr inbounds { ptr, i64 }, ptr %0, i32 0, i32 0
  store ptr %4, ptr %9, align 8
  %10 = getelementptr inbounds { ptr, i64 }, ptr %0, i32 0, i32 1
  store i64 %6, ptr %10, align 8
  %11 = getelementptr inbounds %1, ptr %0, i32 0, i32 1
  store ptr %8, ptr %11, align 8
  ret void
}

define hidden void @f2(ptr noalias nocapture noundef sret(%1) dereferenceable(24) %0, ptr noalias nocapture noundef dereferenceable(24) %1) unnamed_addr #0 {
  %3 = getelementptr inbounds { ptr, i64 }, ptr %1, i32 0, i32 0
  %4 = load ptr, ptr %3, align 8, !nonnull !2, !align !3, !noundef !2
  %5 = getelementptr inbounds { ptr, i64 }, ptr %1, i32 0, i32 1
  %6 = load i64, ptr %5, align 8
  %7 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %1, i32 0, i32 1
  %8 = load ptr, ptr %7, align 8, !align !6
  %9 = getelementptr inbounds { ptr, i64 }, ptr %0, i32 0, i32 0
  store ptr %4, ptr %9, align 8
  %10 = getelementptr inbounds { ptr, i64 }, ptr %0, i32 0, i32 1
  store i64 %6, ptr %10, align 8
  %11 = getelementptr inbounds %1, ptr %0, i32 0, i32 1
  store ptr %8, ptr %11, align 8
  ret void
}

define void @f(ptr noalias nocapture noundef dereferenceable(24) %string, ptr noalias nocapture noundef dereferenceable(4) %flag) personality ptr @rust_eh_personality {
  ; optstring: Option<String>
  %optstring = alloca %optstring, align 8
  ; kv1: (%str, &String)
  %kv1 = alloca { { ptr, i64 }, ptr }, align 8
  ; kv2: (%str, Option<&String>)
  %kv2 = alloca { { ptr, i64 }, ptr }, align 8
  ; singlekv1: SingleKV<&String>
  %singlekv1 = alloca { { ptr, i64 }, ptr }, align 8
  ; singlekv2: SingleKV<Option<&String>>
  %singlekv2 = alloca { { ptr, i64 }, ptr }, align 8

  %1 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %kv1, i32 0, i32 0, i32 0
  %2 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %kv1, i32 0, i32 0, i32 1
  %3 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %kv1, i32 0, i32 1
  store ptr @emptystr, ptr %kv1, align 8
  store i64 0, ptr %2, align 8
  store ptr %string, ptr %3, align 8
  ; kv1 constructed

  %cond = load i32, ptr %flag, align 8
  %4 = icmp eq i32 %cond, 0
  br i1 %4, label %cmpt, label %cmpf

cmpt:
  %5 = getelementptr inbounds %optstring, ptr %optstring, i32 0, i32 1
  store ptr null, ptr %5, align 8
  br label %cmpd

cmpf:
  store i64 0, ptr %optstring, align 8
  %6 = getelementptr inbounds i8, ptr %optstring, i64 8
  store ptr inttoptr (i64 1 to ptr), ptr %6, align 8
  %7 = getelementptr inbounds i8, ptr %optstring, i64 16
  store i64 0, ptr %7, align 8
  br label %cmpd

cmpd:
  %8 = getelementptr inbounds %optstring, ptr %optstring, i64 0, i32 1
  %9 = load ptr, ptr %8, align 8
  %10 = icmp eq ptr %9, null
  %11 = select i1 %10, ptr null, ptr %optstring

  %12 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %kv1, i32 0, i32 0, i32 0
  %13 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %kv1, i32 0, i32 0, i32 1
  %14 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %kv1, i32 0, i32 1
  store ptr @emptystr, ptr %12, align 8
  store i64 0, ptr %13, align 8
  store ptr %11, ptr %14, align 8
  ; kv2 constructed

  invoke void @f1(ptr noalias nocapture noundef nonnull sret(%1) dereferenceable(24) %singlekv1, ptr noalias nocapture noundef nonnull dereferenceable(24) %kv1)
          to label %f1d unwind label %unwind

f1d:
  invoke void @f2(ptr noalias nocapture noundef nonnull sret(%1) dereferenceable(24) %singlekv2, ptr noalias nocapture noundef nonnull dereferenceable(24) %kv2)
          to label %f2d unwind label %unwind

f2d:
  invoke void @out(ptr %singlekv1, ptr %singlekv2) to label %outd unwind label %unwind

outd:
  ret void

unwind:
  %15 = landingpad { ptr, i32 }
          cleanup
  unreachable
}

declare void @out(ptr, ptr)

declare noundef i32 @rust_eh_personality(i32, i32 noundef, i64, ptr, ptr) unnamed_addr #10

!2 = !{}
!3 = !{i64 1}
!6 = !{i64 8}

What happens is that MergeFunctions is registering @f2 and @f1 as the same, even though @f1 asserts that the value %8 is not null. @f1 and @f2 operates on data types that shares the same layout but not the same semantics, thus the difference in the non-null assertion. By incorrectly replacing the call to @f2 with a call to @f2 and inlining the calls, SROA inserts an assertion that the value %11 is not null, which is false since this can be controlled by %flag. Based on this incorrect assertion, InstCombine produces an IR that feeds a non-null pointer value to the second argument of the @out call unconditionally.

This differential proposes one of a possible remedy, by distinguishing non-null values in the function.

Diff Detail

Event Timeline

dingxiangfei2009 requested review of this revision.Feb 23 2023, 3:34 PM
Herald added a project: Restricted Project. · View Herald TranscriptFeb 23 2023, 3:34 PM
nikic requested changes to this revision.Feb 24 2023, 1:07 AM
nikic added a subscriber: nikic.

Please upload patch with full context (-U99999). This needs a test in llvm/test/Transforms/MergeFuncs. What about metadata other than !nonnull?

This revision now requires changes to proceed.Feb 24 2023, 1:07 AM

A regression test is added. The full context is attached as requested.

Thanks for the review, @nikic ! I am searching for other uses of metadata for optimization.

nikic added inline comments.Feb 24 2023, 2:23 AM
llvm/lib/Transforms/Utils/FunctionComparator.cpp
619–620

The right position to add the new check would be here. You can see it already handles range metadata. Your test also has align and noundef metadata. At this point, we should generalize this to arbitrary metadata (maybe ignoring debug metadata).

llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll
10 ↗(On Diff #500119)

There are a lot of attributes and instructions in this test that can be dropped -- we don't need much more than the the load instructions that can't be merged. See also https://llvm.org/docs/TestingGuide.html#best-practices-for-regression-tests.

The diff is updated to check other metadata kinds applicable to loads, except !nontemporal which seems to affect choice of machine instruction.

The test is further trimmed down to only testing metadata that is concerned.

Update test: dereferenceable in the function argument is shrinked to just cover the pointer value.

I am not able to reproduce the build failures from fuzzer test locally using ninja check-fuzzer. I am not sure what I have missed.

llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll
10 ↗(On Diff #500119)

@nikic Thanks for the tip! I have reduce the test to reflect the new changes. Does it look reasonable to you?

nikic added inline comments.Feb 27 2023, 6:04 AM
llvm/lib/Transforms/Utils/FunctionComparator.cpp
230

There is more metadata that applies to loads. I can think of !align, !tbaa, !alias.scope, !noalias, !llvm.mem.parallel.loop.access and !llvm.access.group.

This is why I suggested to compare all metadata and only skip some white-listed cases like !dbg. Otherwise we are likely to miss something here, especially also if new metadata is added in the future.

llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll
1 ↗(On Diff #500558)

Please use update_test_checks.py.

6 ↗(On Diff #500558)

Is this line needed?

10 ↗(On Diff #500558)

You should be able to drop all the attributes (noalias etc).

dingxiangfei2009 marked an inline comment as not done.

As requested, the new test case is properly processed by update_test_checks.py. In addition, the adhoc metadata comparison is replaced with a generic comparator.

A dangling comment line is removed from the test case.

The extra attributes are removed from the test function arguments.

llvm/lib/Transforms/Utils/FunctionComparator.cpp
230

I see. I have added the generic comparator.

By the way, should I also update the documentation here? I would do it separately, if you see fit.

llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll
6 ↗(On Diff #500558)

The data layout line is removed.

10 ↗(On Diff #500558)

All the attributes are now dropped.

dingxiangfei2009 added a comment.EditedFeb 27 2023, 3:18 PM

I am not sure why llvm-reduce is failing on an MIR test. I could not reproduce it locally after rebasing to the latest main.

llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll
1 ↗(On Diff #500558)

@nikic Sorry.

The test is now re-generated with the script that you recommended.

nikic added inline comments.Mar 6 2023, 8:21 AM
llvm/lib/Transforms/Utils/FunctionComparator.cpp
166–167

This comment should be dropped, as it's no longer range specific.

183

Add a TODO here that we need to compare other metadata contents. The current implementation will consider any metadata that doesn't contain numbers as equivalent, which isn't right. But I think it's fine to leave it for now, so we don't get carried away.

196

I'd call this cmpInstMetadata and accept Instruction -- this code is not really load specific anymore.

202

These checks here don't make sense to me. At this point L and R should be non-null. (Could even accept them by reference.)

204

The "empty nodes" part shouldn't be there anymore.

208
222

It would be better to use getAllMetadataOtherThanDebugLoc().

nikic added a comment.Mar 6 2023, 8:22 AM

I am not sure why llvm-reduce is failing on an MIR test. I could not reproduce it locally after rebasing to the latest main.

It is/was a flaky test, you can ignore it.

nikic retitled this revision from Fix miscompilation from MergeFunctions-Inline-SROA-InstCombine optimization sequence to [MergeFuncs] Compare load instruction metadata.Mar 6 2023, 8:22 AM
nikic requested changes to this revision.Mar 7 2023, 1:21 AM

(Per the last comments, though this looks mostly good now.)

This revision now requires changes to proceed.Mar 7 2023, 1:21 AM

Sorry for the delay. The suggestions have been applied.

Sorry, wrong patch.

llvm/lib/Transforms/Utils/FunctionComparator.cpp
166–167

Comment dropped

183

I have added a TODO with some explanation.

196

Renamed

202

Checks are removed

208

Applied

222

Switched to getAllMetadataOtherThanDebugLoc

nikic accepted this revision.Mar 8 2023, 8:46 AM

LGTM

If you don't have commit access, can you please tell me the Name <email> to use for attribution?

llvm/lib/Transforms/Utils/FunctionComparator.cpp
199

carries -> carry

This revision is now accepted and ready to land.Mar 8 2023, 8:46 AM

Fix typo "carries -> carry"

@nikic Sorry for the late reply! Indeed, I do not have commit access. For attribution, you may quote Ding Xiang Fei <dingxiangfei2009@protonmail.ch>.

Thank you for your review!

llvm/lib/Transforms/Utils/FunctionComparator.cpp
199

Typo fixed

This revision was landed with ongoing or failed builds.Mar 21 2023, 1:49 AM
This revision was automatically updated to reflect the committed changes.