diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1717,13 +1717,6 @@ /// Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. bool GVNPass::processNonLocalLoad(LoadInst *Load) { - // non-local speculations are not allowed under asan. - if (Load->getParent()->getParent()->hasFnAttribute( - Attribute::SanitizeAddress) || - Load->getParent()->getParent()->hasFnAttribute( - Attribute::SanitizeHWAddress)) - return false; - // Step 1: Find the non-local dependencies of the load. LoadDepVect Deps; MD->getNonLocalPointerDependency(Load, Deps); @@ -1773,6 +1766,17 @@ // Perform PHI construction. Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this); + + // If a non-local load would result in producing undef or an alloca (which + // is not initialized), don't fold away control flow. We want the + // sanitizers to help report this case. + if (isa(V) || isa(V)) { + Function *F = Load->getParent()->getParent(); + if (F->hasFnAttribute(Attribute::SanitizeAddress) || + F->hasFnAttribute(Attribute::SanitizeHWAddress)) + return false; + } + Load->replaceAllUsesWith(V); if (isa(V)) diff --git a/llvm/test/Transforms/GVN/no_speculative_loads_with_asan.ll b/llvm/test/Transforms/GVN/no_speculative_loads_with_asan.ll --- a/llvm/test/Transforms/GVN/no_speculative_loads_with_asan.ll +++ b/llvm/test/Transforms/GVN/no_speculative_loads_with_asan.ll @@ -3,7 +3,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" declare noalias ptr @_Znam(i64) #1 -; Load of %i8 is an out of bounds load, which is folded to poison, which allows +; Load of %i8 is an out of bounds load, which is folded to undef, which allows ; us to elide the phi. define i32 @TestNoAsan() { ; CHECK-LABEL: @TestNoAsan( @@ -164,21 +164,20 @@ ; CHECK-NEXT: [[TOBOOL_I_I:%.*]] = icmp ne i16 [[TMP3]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_I_I]], label [[IF_THEN_I_I:%.*]], label [[IF_END_I_I:%.*]] ; CHECK: if.then.i.i: -; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SA_ADDR_I_I]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[CONV_I_I:%.*]] = sext i32 [[TMP5]] to i64 -; CHECK-NEXT: [[CALL_I_I:%.*]] = call i32 @_Z6memcmpPvS_m(ptr noundef [[TMP4]], ptr noundef @compare_netdev_and_ip_sb_0, i64 noundef [[CONV_I_I]]) +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[CONV_I_I:%.*]] = sext i32 [[TMP4]] to i64 +; CHECK-NEXT: [[CALL_I_I:%.*]] = call i32 @_Z6memcmpPvS_m(ptr noundef [[TMP2]], ptr noundef @compare_netdev_and_ip_sb_0, i64 noundef [[CONV_I_I]]) ; CHECK-NEXT: store i32 [[CALL_I_I]], ptr [[RETVAL_I_I]], align 4 ; CHECK-NEXT: br label [[COMPARE_NETDEV_AND_IP_EXIT_I:%.*]] ; CHECK: if.end.i.i: -; CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr @ipv6_addr_cmp_a1, align 8 -; CHECK-NEXT: [[CALL2_I_I:%.*]] = call i32 @_Z6memcmpPvS_m(ptr noundef [[TMP6]], ptr noundef @ipv6_addr_cmp_a2, i64 noundef 4) +; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr @ipv6_addr_cmp_a1, align 8 +; CHECK-NEXT: [[CALL2_I_I:%.*]] = call i32 @_Z6memcmpPvS_m(ptr noundef [[TMP5]], ptr noundef @ipv6_addr_cmp_a2, i64 noundef 4) ; CHECK-NEXT: store i32 [[CALL2_I_I]], ptr @compare_netdev_and_ip___trans_tmp_1, align 4 ; CHECK-NEXT: store i32 [[CALL2_I_I]], ptr [[RETVAL_I_I]], align 4 ; CHECK-NEXT: br label [[COMPARE_NETDEV_AND_IP_EXIT_I]] ; CHECK: compare_netdev_and_ip.exit.i: -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[RETVAL_I_I]], align 4 -; CHECK-NEXT: store i32 [[TMP7]], ptr @node_from_ndev_ip_data, align 4 +; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[CALL2_I_I]], [[IF_END_I_I]] ], [ [[CALL_I_I]], [[IF_THEN_I_I]] ] +; CHECK-NEXT: store i32 [[TMP6]], ptr @node_from_ndev_ip_data, align 4 ; CHECK-NEXT: br label [[WHILE_COND_I]] ; CHECK: node_from_ndev_ip.exit: ; CHECK-NEXT: br label [[IF_END]] @@ -239,3 +238,51 @@ if.end: ; preds = %node_from_ndev_ip.exit, %entry ret i32 0 } + +; The intent of this test is: when we produce the phi in %if.end, we should not +; produce undef for any incoming values, otherwise ASAN would miss the +; resulting load of uninialized memory since later optimizations may disconnect +; the edge producing undef. +define dso_local i32 @foo(i32 noundef %x) sanitize_address { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[O:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[P:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[X:%.*]], ptr [[X_ADDR]], align 4 +; CHECK-NEXT: store ptr [[O]], ptr [[P]], align 8 +; CHECK-NEXT: store i32 42, ptr [[V]], align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8 +; CHECK-NEXT: store i32 42, ptr [[TMP0]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[TMP1:%.*]] = phi ptr [ [[TMP0]], [[IF_THEN]] ], [ [[O]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 42 +; +entry: + %x.addr = alloca i32, align 4 + %o = alloca i32, align 4 + %p = alloca ptr, align 8 + %v = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + store ptr %o, ptr %p, align 8 + store i32 42, ptr %v, align 4 + %0 = load i32, ptr %x.addr, align 4 + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + %1 = load i32, ptr %v, align 4 + %2 = load ptr, ptr %p, align 8 + store i32 %1, ptr %2, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %3 = load ptr, ptr %p, align 8 + %4 = load i32, ptr %3, align 4 + ret i32 %4 +}