Today we realize that we are able to hit a corner case that leads to mis-compilation. This is due to interaction of merge function passes with other optimization passes involving !nonnull assertions.
By applying MergeFunctions, Inline, SROA and InstCombine in this order to the following IR, the program semantics changes from calling the external function @out with arguments (some pointer, null) to calling @out with (some pointer, some other non-null pointer).
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" ; target triple = "arm64-apple-macosx11.0.0" %1 = type { { ptr, i64 }, ptr } %optstring = type { [1 x i64], ptr, [1 x i64] } @emptystr = external hidden unnamed_addr constant <{}>, align 8 define hidden void @f1(ptr noalias nocapture noundef sret(%1) dereferenceable(24) %0, ptr noalias nocapture noundef dereferenceable(24) %1) unnamed_addr #0 { %3 = getelementptr inbounds { ptr, i64 }, ptr %1, i32 0, i32 0 %4 = load ptr, ptr %3, align 8, !nonnull !2, !align !3, !noundef !2 %5 = getelementptr inbounds { ptr, i64 }, ptr %1, i32 0, i32 1 %6 = load i64, ptr %5, align 8 %7 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %1, i32 0, i32 1 %8 = load ptr, ptr %7, align 8, !nonnull !2, !align !6, !noundef !2 %9 = getelementptr inbounds { ptr, i64 }, ptr %0, i32 0, i32 0 store ptr %4, ptr %9, align 8 %10 = getelementptr inbounds { ptr, i64 }, ptr %0, i32 0, i32 1 store i64 %6, ptr %10, align 8 %11 = getelementptr inbounds %1, ptr %0, i32 0, i32 1 store ptr %8, ptr %11, align 8 ret void } define hidden void @f2(ptr noalias nocapture noundef sret(%1) dereferenceable(24) %0, ptr noalias nocapture noundef dereferenceable(24) %1) unnamed_addr #0 { %3 = getelementptr inbounds { ptr, i64 }, ptr %1, i32 0, i32 0 %4 = load ptr, ptr %3, align 8, !nonnull !2, !align !3, !noundef !2 %5 = getelementptr inbounds { ptr, i64 }, ptr %1, i32 0, i32 1 %6 = load i64, ptr %5, align 8 %7 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %1, i32 0, i32 1 %8 = load ptr, ptr %7, align 8, !align !6 %9 = getelementptr inbounds { ptr, i64 }, ptr %0, i32 0, i32 0 store ptr %4, ptr %9, align 8 %10 = getelementptr inbounds { ptr, i64 }, ptr %0, i32 0, i32 1 store i64 %6, ptr %10, align 8 %11 = getelementptr inbounds %1, ptr %0, i32 0, i32 1 store ptr %8, ptr %11, align 8 ret void } define void @f(ptr noalias nocapture noundef dereferenceable(24) %string, ptr noalias nocapture noundef dereferenceable(4) %flag) personality ptr @rust_eh_personality { ; optstring: Option<String> %optstring = alloca %optstring, align 8 ; kv1: (%str, &String) %kv1 = alloca { { ptr, i64 }, ptr }, align 8 ; kv2: (%str, Option<&String>) %kv2 = alloca { { ptr, i64 }, ptr }, align 8 ; singlekv1: SingleKV<&String> %singlekv1 = alloca { { ptr, i64 }, ptr }, align 8 ; singlekv2: SingleKV<Option<&String>> %singlekv2 = alloca { { ptr, i64 }, ptr }, align 8 %1 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %kv1, i32 0, i32 0, i32 0 %2 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %kv1, i32 0, i32 0, i32 1 %3 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %kv1, i32 0, i32 1 store ptr @emptystr, ptr %kv1, align 8 store i64 0, ptr %2, align 8 store ptr %string, ptr %3, align 8 ; kv1 constructed %cond = load i32, ptr %flag, align 8 %4 = icmp eq i32 %cond, 0 br i1 %4, label %cmpt, label %cmpf cmpt: %5 = getelementptr inbounds %optstring, ptr %optstring, i32 0, i32 1 store ptr null, ptr %5, align 8 br label %cmpd cmpf: store i64 0, ptr %optstring, align 8 %6 = getelementptr inbounds i8, ptr %optstring, i64 8 store ptr inttoptr (i64 1 to ptr), ptr %6, align 8 %7 = getelementptr inbounds i8, ptr %optstring, i64 16 store i64 0, ptr %7, align 8 br label %cmpd cmpd: %8 = getelementptr inbounds %optstring, ptr %optstring, i64 0, i32 1 %9 = load ptr, ptr %8, align 8 %10 = icmp eq ptr %9, null %11 = select i1 %10, ptr null, ptr %optstring %12 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %kv1, i32 0, i32 0, i32 0 %13 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %kv1, i32 0, i32 0, i32 1 %14 = getelementptr inbounds { { ptr, i64 }, ptr }, ptr %kv1, i32 0, i32 1 store ptr @emptystr, ptr %12, align 8 store i64 0, ptr %13, align 8 store ptr %11, ptr %14, align 8 ; kv2 constructed invoke void @f1(ptr noalias nocapture noundef nonnull sret(%1) dereferenceable(24) %singlekv1, ptr noalias nocapture noundef nonnull dereferenceable(24) %kv1) to label %f1d unwind label %unwind f1d: invoke void @f2(ptr noalias nocapture noundef nonnull sret(%1) dereferenceable(24) %singlekv2, ptr noalias nocapture noundef nonnull dereferenceable(24) %kv2) to label %f2d unwind label %unwind f2d: invoke void @out(ptr %singlekv1, ptr %singlekv2) to label %outd unwind label %unwind outd: ret void unwind: %15 = landingpad { ptr, i32 } cleanup unreachable } declare void @out(ptr, ptr) declare noundef i32 @rust_eh_personality(i32, i32 noundef, i64, ptr, ptr) unnamed_addr #10 !2 = !{} !3 = !{i64 1} !6 = !{i64 8}
What happens is that MergeFunctions is registering @f2 and @f1 as the same, even though @f1 asserts that the value %8 is not null. @f1 and @f2 operates on data types that shares the same layout but not the same semantics, thus the difference in the non-null assertion. By incorrectly replacing the call to @f2 with a call to @f2 and inlining the calls, SROA inserts an assertion that the value %11 is not null, which is false since this can be controlled by %flag. Based on this incorrect assertion, InstCombine produces an IR that feeds a non-null pointer value to the second argument of the @out call unconditionally.
This differential proposes one of a possible remedy, by distinguishing non-null values in the function.
This comment should be dropped, as it's no longer range specific.