Index: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -269,14 +269,38 @@ // order the state machines in complexity of the generated code. Value *Idx = GEP->getOperand(2); - // If the index is larger than the pointer size of the target, truncate the - // index down like the GEP would do implicitly. We don't have to do this for - // an inbounds GEP because the index can't be out of range. if (!GEP->isInBounds()) { + // If the index is larger than the pointer size of the target, truncate the + // index down like the GEP would do implicitly. We don't have to do this + // for an inbounds GEP because the index can't be out of range. Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); if (Idx->getType()->getPrimitiveSizeInBits().getFixedSize() > PtrSize) Idx = Builder.CreateTrunc(Idx, IntPtrTy); + + // If inbounds keyword is not present, index * ElementSize can overflows. + // One or more indices make index * sizeof(type) and target address equal. + // So we need to erase highest countTrailingZeros(ElementSize) bits to deal + // with all cases. Index * ElementSize can overflows only if the + // sizeof(index before sign extended) is greater or equal to pointer size, + // so we need to check two sizes to prevent meaningless AND operator. + Value *IdxBeforeSext; + bool IsOverflows = false; + if (match(Idx, m_SExt(m_Value(IdxBeforeSext)))) + IsOverflows = + IdxBeforeSext->getType()->getPrimitiveSizeInBits().getFixedSize() >= + PtrSize; + else + IsOverflows = + Idx->getType()->getPrimitiveSizeInBits().getFixedSize() >= PtrSize; + + unsigned ElementSize = + DL.getTypeAllocSize(Init->getType()->getArrayElementType()); + if (IsOverflows && countTrailingZeros(ElementSize) != 0) { + Value *Mask = ConstantInt::get(Idx->getType(), -1); + Mask = Builder.CreateLShr(Mask, countTrailingZeros(ElementSize)); + Idx = Builder.CreateAnd(Idx, Mask); + } } // If the comparison is only true for one or two elements, emit direct Index: llvm/test/Transforms/InstCombine/load-cmp.ll =================================================================== --- llvm/test/Transforms/InstCombine/load-cmp.ll +++ llvm/test/Transforms/InstCombine/load-cmp.ll @@ -33,7 +33,8 @@ define i1 @test1_noinbounds(i32 %X) { ; CHECK-LABEL: @test1_noinbounds( -; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[X:%.*]], 9 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 2147483647 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9 ; CHECK-NEXT: ret i1 [[R]] ; %P = getelementptr [10 x i16], [10 x i16]* @G16, i32 0, i32 %X @@ -44,8 +45,8 @@ define i1 @test1_noinbounds_i64(i64 %X) { ; CHECK-LABEL: @test1_noinbounds_i64( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32 -; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647 +; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[TMP1]], 9 ; CHECK-NEXT: ret i1 [[R]] ; %P = getelementptr [10 x i16], [10 x i16]* @G16, i64 0, i64 %X @@ -54,10 +55,14 @@ ret i1 %R } +; When x is 0x8009, x * 2(ElementSize) overflows and it becomes 0x0012. +; It is the same location as when x is 0x0009, So we need to return +; true if x AND 0x7FFF is 0x0009. + define i1 @test1_noinbounds_as1(i32 %x) { ; CHECK-LABEL: @test1_noinbounds_as1( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16 -; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[TMP1]], 9 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 32767 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr [10 x i16], [10 x i16] addrspace(1)* @G16_as1, i16 0, i32 %x @@ -260,7 +265,8 @@ define i1 @test10_struct_arr_noinbounds(i32 %x) { ; CHECK-LABEL: @test10_struct_arr_noinbounds( -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 268435455 +; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2 @@ -305,8 +311,8 @@ define i1 @test10_struct_arr_noinbounds_i64(i64 %x) { ; CHECK-LABEL: @test10_struct_arr_noinbounds_i64( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32 -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 268435455 +; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2