diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -279,9 +279,28 @@ Idx = Builder.CreateTrunc(Idx, IntPtrTy); } + // If inbounds keyword is not present, Idx * ElementSize can overflow. + // Let's assume that ElementSize is 2 and the wanted value is at offset 0. + // Then, there are two possible values for Idx to match offset 0: + // 0x00..00, 0x80..00. + // Emitting 'icmp eq Idx, 0' isn't correct in this case because the + // comparison is false if Idx was 0x80..00. + // We need to erase the highest countTrailingZeros(ElementSize) bits of Idx. + unsigned ElementSize = + DL.getTypeAllocSize(Init->getType()->getArrayElementType()); + auto MaskIdx = [&](Value* Idx){ + if (!GEP->isInBounds() && countTrailingZeros(ElementSize) != 0) { + Value *Mask = ConstantInt::get(Idx->getType(), -1); + Mask = Builder.CreateLShr(Mask, countTrailingZeros(ElementSize)); + Idx = Builder.CreateAnd(Idx, Mask); + } + return Idx; + }; + // If the comparison is only true for one or two elements, emit direct // comparisons. if (SecondTrueElement != Overdefined) { + Idx = MaskIdx(Idx); // None true -> false. if (FirstTrueElement == Undefined) return replaceInstUsesWith(ICI, Builder.getFalse()); @@ -302,6 +321,7 @@ // If the comparison is only false for one or two elements, emit direct // comparisons. if (SecondFalseElement != Overdefined) { + Idx = MaskIdx(Idx); // None false -> true. if (FirstFalseElement == Undefined) return replaceInstUsesWith(ICI, Builder.getTrue()); @@ -323,6 +343,7 @@ // where it is true, emit the range check. if (TrueRangeEnd != Overdefined) { assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); + Idx = MaskIdx(Idx); // Generate (i-FirstTrue) u (FalseRangeEnd-FirstFalse). if (FirstFalseElement) { Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); @@ -364,6 +386,7 @@ Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount); if (Ty) { + Idx = MaskIdx(Idx); Value *V = Builder.CreateIntCast(Idx, Ty, false); V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V); diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll --- a/llvm/test/Transforms/InstCombine/load-cmp.ll +++ b/llvm/test/Transforms/InstCombine/load-cmp.ll @@ -2,22 +2,22 @@ ; RUN: opt -instcombine -S -data-layout="p:32:32:32-p1:16:16:16-n8:16:32:64" < %s | FileCheck %s @G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, - i16 73, i16 82, i16 69, i16 68, i16 0] + i16 73, i16 82, i16 69, i16 68, i16 0] @G16_as1 = internal addrspace(1) constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, - i16 73, i16 82, i16 69, i16 68, i16 0] + i16 73, i16 82, i16 69, i16 68, i16 0] @GD = internal constant [6 x double] - [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0] + [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0] %Foo = type { i32, i32, i32, i32 } @GS = internal constant %Foo { i32 1, i32 4, i32 9, i32 14 } @GStructArr = internal constant [4 x %Foo] [ %Foo { i32 1, i32 4, i32 9, i32 14 }, - %Foo { i32 5, i32 4, i32 6, i32 11 }, - %Foo { i32 6, i32 5, i32 9, i32 20 }, - %Foo { i32 12, i32 3, i32 9, i32 8 } ] + %Foo { i32 5, i32 4, i32 6, i32 11 }, + %Foo { i32 6, i32 5, i32 9, i32 20 }, + %Foo { i32 12, i32 3, i32 9, i32 8 } ] define i1 @test1(i32 %X) { @@ -33,7 +33,8 @@ define i1 @test1_noinbounds(i32 %X) { ; CHECK-LABEL: @test1_noinbounds( -; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[X:%.*]], 9 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 2147483647 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9 ; CHECK-NEXT: ret i1 [[R]] ; %P = getelementptr [10 x i16], [10 x i16]* @G16, i32 0, i32 %X @@ -44,8 +45,8 @@ define i1 @test1_noinbounds_i64(i64 %X) { ; CHECK-LABEL: @test1_noinbounds_i64( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32 -; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647 +; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[TMP1]], 9 ; CHECK-NEXT: ret i1 [[R]] ; %P = getelementptr [10 x i16], [10 x i16]* @G16, i64 0, i64 %X @@ -56,8 +57,8 @@ define i1 @test1_noinbounds_as1(i32 %x) { ; CHECK-LABEL: @test1_noinbounds_as1( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16 -; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[TMP1]], 9 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 32767 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr [10 x i16], [10 x i16] addrspace(1)* @G16_as1, i16 0, i32 %x @@ -260,7 +261,8 @@ define i1 @test10_struct_arr_noinbounds(i32 %x) { ; CHECK-LABEL: @test10_struct_arr_noinbounds( -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 268435455 +; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2 @@ -294,7 +296,9 @@ define i1 @test10_struct_arr_noinbounds_i16(i16 %x) { ; CHECK-LABEL: @test10_struct_arr_noinbounds_i16( -; CHECK-NEXT: [[R:%.*]] = icmp ne i16 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 268435455 +; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP2]], 1 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2 @@ -305,8 +309,8 @@ define i1 @test10_struct_arr_noinbounds_i64(i64 %x) { ; CHECK-LABEL: @test10_struct_arr_noinbounds_i64( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32 -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 268435455 +; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2