Index: llvm/lib/Analysis/Loads.cpp =================================================================== --- llvm/lib/Analysis/Loads.cpp +++ llvm/lib/Analysis/Loads.cpp @@ -374,6 +374,12 @@ return nullptr; --ScanFrom; + + // Loading from uninitialized stack memory? The loaded value is undefined. + // TODO: We could do a similar check for a load from a malloc-like function. + if (isa(Inst) && Inst == StrippedPtr) + return UndefValue::get(AccessTy); + // If this is a load of Ptr, the loaded value is available. // (This is true even if the load is volatile or atomic, although // those cases are unlikely.) Index: llvm/test/Transforms/Inline/byval-tail-call.ll =================================================================== --- llvm/test/Transforms/Inline/byval-tail-call.ll +++ llvm/test/Transforms/Inline/byval-tail-call.ll @@ -63,12 +63,11 @@ } define void @barfoo() { -; CHECK-LABEL: define void @barfoo( -; CHECK: %[[POS:.*]] = alloca i32 -; CHECK: %[[VAL:.*]] = load i32, i32* %x -; CHECK: store i32 %[[VAL]], i32* %[[POS]] -; CHECK: tail call void @ext2(i32* byval nonnull %[[POS]] -; CHECK: ret void +; CHECK-LABEL: @barfoo( +; CHECK-NEXT: [[X1:%.*]] = alloca i32, align 4 +; CHECK: tail call void @ext2(i32* byval nonnull [[X1]]) +; CHECK: ret void +; %x = alloca i32 tail call void @bar2(i32* byval %x) ret void Index: llvm/test/Transforms/InstCombine/and-or-icmps.ll =================================================================== --- llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -199,27 +199,74 @@ ret <2 x i1> %and } -; This is a fuzzer-generated test that would assert because +; This was a fuzzer-generated test that would assert because ; we'd get into foldAndOfICmps() without running InstSimplify ; on an 'and' that should have been killed. It's not obvious ; why, but removing anything hides the bug, hence the long test. define void @simplify_before_foldAndOfICmps() { ; CHECK-LABEL: @simplify_before_foldAndOfICmps( -; CHECK-NEXT: [[A8:%.*]] = alloca i16, align 2 -; CHECK-NEXT: [[L7:%.*]] = load i16, i16* [[A8]], align 2 -; CHECK-NEXT: [[C10:%.*]] = icmp ult i16 [[L7]], 2 -; CHECK-NEXT: [[C7:%.*]] = icmp slt i16 [[L7]], 0 -; CHECK-NEXT: [[C18:%.*]] = or i1 [[C7]], [[C10]] -; CHECK-NEXT: [[L7_LOBIT:%.*]] = ashr i16 [[L7]], 15 -; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[L7_LOBIT]] to i64 -; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, i1* null, i64 [[TMP1]] +; CHECK-NEXT: store i1 true, i1* undef, align 1 +; CHECK-NEXT: store i1* null, i1** undef, align 8 +; CHECK-NEXT: ret void +; + %A8 = alloca i16 + %L7 = load i16, i16* %A8 + %G21 = getelementptr i16, i16* %A8, i8 -1 + %B11 = udiv i16 %L7, -1 + %G4 = getelementptr i16, i16* %A8, i16 %B11 + %L2 = load i16, i16* %G4 + %L = load i16, i16* %G4 + %B23 = mul i16 %B11, %B11 + %L4 = load i16, i16* %A8 + %B21 = sdiv i16 %L7, %L4 + %B7 = sub i16 0, %B21 + %B18 = mul i16 %B23, %B7 + %C10 = icmp ugt i16 %L, %B11 + %B20 = and i16 %L7, %L2 + %B1 = mul i1 %C10, true + %C5 = icmp sle i16 %B21, %L + %C11 = icmp ule i16 %B21, %L + %C7 = icmp slt i16 %B20, 0 + %B29 = srem i16 %L4, %B18 + %B15 = add i1 %C7, %C10 + %B19 = add i1 %C11, %B15 + %C6 = icmp sge i1 %C11, %B19 + %B33 = or i16 %B29, %L4 + %C13 = icmp uge i1 %C5, %B1 + %C3 = icmp ult i1 %C13, %C6 + store i16 undef, i16* %G21 + %C18 = icmp ule i1 %C10, %C7 + %G26 = getelementptr i1, i1* null, i1 %C3 + store i16 %B33, i16* undef + store i1 %C18, i1* undef + store i1* %G26, i1** undef + ret void +} + +define void @simplify_before_foldAndOfICmps_alt(i16* %A8) { +; CHECK-LABEL: @simplify_before_foldAndOfICmps_alt( +; CHECK-NEXT: [[L7:%.*]] = load i16, i16* [[A8:%.*]], align 2 +; CHECK-NEXT: [[G4:%.*]] = getelementptr i16, i16* [[A8]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load i16, i16* [[G4]], align 2 +; CHECK-NEXT: [[C10:%.*]] = icmp ugt i16 [[L2]], 1 +; CHECK-NEXT: [[B20:%.*]] = and i16 [[L7]], [[L2]] +; CHECK-NEXT: [[C5:%.*]] = icmp slt i16 [[L2]], 1 +; CHECK-NEXT: [[C11:%.*]] = icmp ne i16 [[L2]], 0 +; CHECK-NEXT: [[C7:%.*]] = icmp slt i16 [[B20]], 0 +; CHECK-NEXT: [[B15:%.*]] = xor i1 [[C7]], [[C10]] +; CHECK-NEXT: [[B19:%.*]] = xor i1 [[C11]], [[B15]] +; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[C10]], [[C5]] +; CHECK-NEXT: [[C3:%.*]] = and i1 [[B19]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[C10]], true +; CHECK-NEXT: [[C18:%.*]] = or i1 [[C7]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = sext i1 [[C3]] to i64 +; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, i1* null, i64 [[TMP3]] ; CHECK-NEXT: store i16 [[L7]], i16* undef, align 2 ; CHECK-NEXT: store i1 [[C18]], i1* undef, align 1 ; CHECK-NEXT: store i1* [[G26]], i1** undef, align 8 ; CHECK-NEXT: ret void ; - %A8 = alloca i16 %L7 = load i16, i16* %A8 %G21 = getelementptr i16, i16* %A8, i8 -1 %B11 = udiv i16 %L7, -1 Index: llvm/test/Transforms/InstCombine/apint-shift.ll =================================================================== --- llvm/test/Transforms/InstCombine/apint-shift.ll +++ llvm/test/Transforms/InstCombine/apint-shift.ll @@ -531,7 +531,7 @@ ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=9880 define i177 @ossfuzz_9880(i177 %X) { ; CHECK-LABEL: @ossfuzz_9880( -; CHECK-NEXT: ret i177 1 +; CHECK-NEXT: ret i177 undef ; %A = alloca i177 %L1 = load i177, i177* %A Index: llvm/test/Transforms/InstCombine/getelementptr.ll =================================================================== --- llvm/test/Transforms/InstCombine/getelementptr.ll +++ llvm/test/Transforms/InstCombine/getelementptr.ll @@ -359,15 +359,29 @@ ; CHECK: icmp eq i16 %1, 0 } +; Verify that we return the access type -- not the alloca type -- when simplifying to undef. define i32 @test21() { - %pbob1 = alloca %intstruct - %pbob2 = getelementptr %intstruct, %intstruct* %pbob1 - %pbobel = getelementptr %intstruct, %intstruct* %pbob2, i64 0, i32 0 - %rval = load i32, i32* %pbobel - ret i32 %rval ; CHECK-LABEL: @test21( -; CHECK: getelementptr inbounds %intstruct, %intstruct* %pbob1, i64 0, i32 0 +; CHECK-NEXT: ret i32 undef +; + %pbob1 = alloca %intstruct + %pbob2 = getelementptr %intstruct, %intstruct* %pbob1 + %pbobel = getelementptr %intstruct, %intstruct* %pbob2, i64 0, i32 0 + %rval = load i32, i32* %pbobel + ret i32 %rval +} + +define i32 @test21_alt(%intstruct* %pbob1) { +; CHECK-LABEL: @test21_alt( +; CHECK-NEXT: [[PBOBEL:%.*]] = getelementptr [[INTSTRUCT:%.*]], %intstruct* [[PBOB1:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[RVAL:%.*]] = load i32, i32* [[PBOBEL]], align 4 +; CHECK-NEXT: ret i32 [[RVAL]] +; + %pbob2 = getelementptr %intstruct, %intstruct* %pbob1 + %pbobel = getelementptr %intstruct, %intstruct* %pbob2, i64 0, i32 0 + %rval = load i32, i32* %pbobel + ret i32 %rval } Index: llvm/test/Transforms/InstCombine/multiple-uses-load-bitcast-select.ll =================================================================== --- llvm/test/Transforms/InstCombine/multiple-uses-load-bitcast-select.ll +++ llvm/test/Transforms/InstCombine/multiple-uses-load-bitcast-select.ll @@ -1,21 +1,46 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S -data-layout="E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" | FileCheck %s +; This used to test for an infinite loop, but now we simplify +; the loads to undef before we get to the problematic fold. + define void @PR35618(i64* %st1, double* %st2) { ; CHECK-LABEL: @PR35618( +; CHECK-NEXT: ret void +; + %y1 = alloca double + %z1 = alloca double + %ld1 = load double, double* %y1 + %ld2 = load double, double* %z1 + %tmp10 = fcmp olt double %ld1, %ld2 + %sel = select i1 %tmp10, double* %y1, double* %z1 + %tmp11 = bitcast double* %sel to i64* + %tmp12 = load i64, i64* %tmp11 + store i64 %tmp12, i64* %st1 + %bc = bitcast double* %st2 to i64* + store i64 %tmp12, i64* %bc + ret void +} + +define void @PR35618_better(double %y, double %z, i64* %st1, double* %st2) { +; CHECK-LABEL: @PR35618_better( ; CHECK-NEXT: [[Y1:%.*]] = alloca double, align 8 ; CHECK-NEXT: [[Z1:%.*]] = alloca double, align 8 -; CHECK-NEXT: [[LD1:%.*]] = load double, double* [[Y1]], align 8 -; CHECK-NEXT: [[LD2:%.*]] = load double, double* [[Z1]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = fcmp olt double [[LD1]], [[LD2]] -; CHECK-NEXT: [[TMP121:%.*]] = select i1 [[TMP10]], double [[LD1]], double [[LD2]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[ST1:%.*]] to double* -; CHECK-NEXT: store double [[TMP121]], double* [[TMP1]], align 8 -; CHECK-NEXT: store double [[TMP121]], double* [[ST2:%.*]], align 8 +; CHECK-NEXT: store double [[Y:%.*]], double* [[Y1]], align 8 +; CHECK-NEXT: store double [[Z:%.*]], double* [[Z1]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = fcmp olt double [[Y]], [[Z]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP10]], double* [[Y1]], double* [[Z1]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[SEL]] to i64* +; CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +; CHECK-NEXT: store i64 [[TMP12]], i64* [[ST1:%.*]], align 8 +; CHECK-NEXT: [[BC:%.*]] = bitcast double* [[ST2:%.*]] to i64* +; CHECK-NEXT: store i64 [[TMP12]], i64* [[BC]], align 8 ; CHECK-NEXT: ret void ; %y1 = alloca double %z1 = alloca double + store double %y, double* %y1 + store double %z, double* %z1 %ld1 = load double, double* %y1 %ld2 = load double, double* %z1 %tmp10 = fcmp olt double %ld1, %ld2