diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1360,22 +1360,34 @@ // to determine if we can prove known low zero bits. KnownBits LocalKnown(BitWidth); computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q); - unsigned TrailZ = LocalKnown.countMinTrailingZeros(); + KnownBits AddrKnownBits(LocalKnown); + // Accumulate the constant indices in a separate variable + // to minimize the number of calls to computeForAddSub. + APInt AccCstIndices(BitWidth, 0, /*IsSigned*/ true); gep_type_iterator GTI = gep_type_begin(I); + // If the inbounds keyword is not present, the offsets are added to the + // base address with silently-wrapping two’s complement arithmetic. + bool IsInBounds = cast(I)->isInBounds(); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { // TrailZ can only become smaller, short-circuit if we hit zero. - if (TrailZ == 0) + if (AddrKnownBits.isUnknown()) break; Value *Index = I->getOperand(i); + + // Handle case when index is zero. + Constant *CIndex = dyn_cast(Index); + if (CIndex && CIndex->isZeroValue()) + continue; + + unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); + KnownBits IndexBits(IndexBitWidth); if (StructType *STy = GTI.getStructTypeOrNull()) { // Handle struct member offset arithmetic. - // Handle case when index is vector zeroinitializer - Constant *CIndex = cast(Index); - if (CIndex->isZeroValue()) - continue; + assert(CIndex && + "Access to structure field must be known at compile time"); if (CIndex->getType()->isVectorTy()) Index = CIndex->getSplatValue(); @@ -1383,26 +1395,57 @@ unsigned Idx = cast(Index)->getZExtValue(); const StructLayout *SL = Q.DL.getStructLayout(STy); uint64_t Offset = SL->getElementOffset(Idx); - TrailZ = std::min(TrailZ, - countTrailingZeros(Offset)); + AccCstIndices += Offset; + continue; + } + + // Handle array index arithmetic. + Type *IndexedTy = GTI.getIndexedType(); + if (!IndexedTy->isSized()) { + AddrKnownBits.resetAll(); + break; + } + + computeKnownBits(Index, IndexBits, Depth + 1, Q); + TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy); + uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinSize(); + KnownBits ScalingFactor(IndexBitWidth); + // Multiply by current sizeof type. + // &A[i] == A + i * sizeof(*A[i]). + if (IndexTypeSize.isScalable()) { + // For scalable types the only thing we know about sizeof is + // that this is a multiple of the minimum size. + ScalingFactor.Zero.setLowBits(countTrailingZeros(TypeSizeInBytes)); + } else if (IndexBits.isConstant()) { + APInt IndexCst = IndexBits.getConstant(); + APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes); + IndexCst *= ScalingFactor; + AccCstIndices += IndexCst.sextOrTrunc(BitWidth); + continue; } else { - // Handle array index arithmetic. - Type *IndexedTy = GTI.getIndexedType(); - if (!IndexedTy->isSized()) { - TrailZ = 0; - break; - } - unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); - uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy).getKnownMinSize(); - LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0); - computeKnownBits(Index, LocalKnown, Depth + 1, Q); - TrailZ = std::min(TrailZ, - unsigned(countTrailingZeros(TypeSize) + - LocalKnown.countMinTrailingZeros())); + ScalingFactor.Zero = ~TypeSizeInBytes; + ScalingFactor.One = TypeSizeInBytes; } - } + IndexBits = KnownBits::computeForMul(IndexBits, ScalingFactor); + + // If the offsets have a different width from the pointer, according + // to the language reference we need to sign-extend or truncate them + // to the width of the pointer. + IndexBits = IndexBits.sextOrTrunc(BitWidth); - Known.Zero.setLowBits(TrailZ); + AddrKnownBits = KnownBits::computeForAddSub( + /*Add=*/true, + /*NSW=*/IsInBounds, AddrKnownBits, IndexBits); + } + if (!AddrKnownBits.isUnknown() && !AccCstIndices.isNullValue()) { + KnownBits Index(BitWidth); + Index.Zero = ~AccCstIndices; + Index.One = AccCstIndices; + AddrKnownBits = KnownBits::computeForAddSub( + /*Add=*/true, + /*NSW=*/IsInBounds, AddrKnownBits, Index); + } + Known = AddrKnownBits; break; } case Instruction::PHI: { diff --git a/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll --- a/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll +++ b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll @@ -225,7 +225,7 @@ define i32 @test_constant_cast_gep_struct_indices_as() { ; CHECK-LABEL: @test_constant_cast_gep_struct_indices_as( -; CHECK-NEXT: [[Y:%.*]] = load i32, i32 addrspace(3)* getelementptr inbounds (%struct.foo, [[STRUCT_FOO:%.*]] addrspace(3)* @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 8 +; CHECK-NEXT: [[Y:%.*]] = load i32, i32 addrspace(3)* getelementptr inbounds (%struct.foo, [[STRUCT_FOO:%.*]] addrspace(3)* @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 16 ; CHECK-NEXT: ret i32 [[Y]] ; %x = getelementptr %struct.foo, %struct.foo addrspace(3)* @constant_fold_global_ptr, i18 0, i32 2, i12 2 diff --git a/llvm/test/Transforms/InstCombine/constant-fold-gep.ll b/llvm/test/Transforms/InstCombine/constant-fold-gep.ll --- a/llvm/test/Transforms/InstCombine/constant-fold-gep.ll +++ b/llvm/test/Transforms/InstCombine/constant-fold-gep.ll @@ -15,20 +15,20 @@ ; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4 ; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 2), align 8 ; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 0), align 4 -; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 1), align 4 +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 1), align 16 ; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 2), align 4 ; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 0), align 8 ; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 1), align 4 -; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 2), align 8 +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 2), align 16 ; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 0), align 4 -; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 1), align 4 +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 1), align 8 ; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 2), align 4 ; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 0), align 16 ; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 1), align 4 ; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 2), align 8 -; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 0), align 8 -; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 1), align 8 -; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 2), align 8 +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 0), align 4 +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 1), align 16 +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 2), align 4 ; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 0), align 8 ; CHECK-NEXT: store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 2, i64 0, i32 0, i64 0), align 16 ; CHECK-NEXT: store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 1), align 8 @@ -49,9 +49,9 @@ store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 12), align 4 store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 13), align 4 store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 14), align 8 - store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 15), align 8 + store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 15), align 4 store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 16), align 8 - store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 17), align 8 + store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 17), align 4 store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 18), align 8 store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 36), align 8 store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 19), align 8 @@ -98,3 +98,19 @@ ret i16 %E } + +; Check that we improve the alignment information. +; The base pointer is 16-byte aligned and we access the field at +; an offset of 8-byte. +; Every element in the @CallerInfos array is 16-byte aligned so +; any access from the following gep is 8-byte aligned. +%struct.CallerInfo = type { i8*, i32 } +@CallerInfos = global [128 x %struct.CallerInfo] zeroinitializer, align 16 + +; CHECK-LABEL: @test_gep_in_struct( +; CHECK; load i32, i32* %NS7, align 8 +define i32 @test_gep_in_struct(i64 %idx) { + %NS7 = getelementptr inbounds [128 x %struct.CallerInfo], [128 x %struct.CallerInfo]* @CallerInfos, i64 0, i64 %idx, i32 1 + %res = load i32, i32* %NS7, align 1 + ret i32 %res +} diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -1208,6 +1208,66 @@ EXPECT_EQ(Known.getMaxValue(), 575); } +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsGEPWithRange) { + parseAssembly( + "define void @test(i64* %p) {\n" + " %A = load i64, i64* %p, !range !{i64 64, i64 65536}\n" + " %APtr = inttoptr i64 %A to float*" + " %APtrPlus512 = getelementptr float, float* %APtr, i32 128\n" + " %c = icmp ugt float* %APtrPlus512, inttoptr (i32 523 to float*)\n" + " call void @llvm.assume(i1 %c)\n" + " ret void\n" + "}\n" + "declare void @llvm.assume(i1)\n"); + AssumptionCache AC(*F); + KnownBits Known = computeKnownBits(A, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + EXPECT_EQ(Known.Zero.getZExtValue(), ~(65536llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 0u); + Instruction &APtrPlus512 = findInstructionByName(F, "APtrPlus512"); + Known = computeKnownBits(&APtrPlus512, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + // We know of one less zero because 512 may have produced a 1 that + // got carried all the way to the first trailing zero. + EXPECT_EQ(Known.Zero.getZExtValue(), ~(65536llu - 1) << 1); + EXPECT_EQ(Known.One.getZExtValue(), 0u); + // The known range is not precise given computeKnownBits works + // with the masks of zeros and ones, not the ranges. + EXPECT_EQ(Known.getMinValue(), 0u); + EXPECT_EQ(Known.getMaxValue(), 131071); +} + +// 4*128 + [32, 64) doesn't produce overlapping bits. +// Make sure we get all the individual bits properly. +// This test is useful to check that we account for the scaling factor +// in the gep. Indeed, gep float, [32,64), 128 is not 128 + [32,64). +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsGEPWithRangeNoOverlap) { + parseAssembly( + "define void @test(i64* %p) {\n" + " %A = load i64, i64* %p, !range !{i64 32, i64 64}\n" + " %APtr = inttoptr i64 %A to float*" + " %APtrPlus512 = getelementptr float, float* %APtr, i32 128\n" + " %c = icmp ugt float* %APtrPlus512, inttoptr (i32 523 to float*)\n" + " call void @llvm.assume(i1 %c)\n" + " ret void\n" + "}\n" + "declare void @llvm.assume(i1)\n"); + AssumptionCache AC(*F); + KnownBits Known = computeKnownBits(A, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + EXPECT_EQ(Known.Zero.getZExtValue(), ~(64llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 32u); + Instruction &APtrPlus512 = findInstructionByName(F, "APtrPlus512"); + Known = computeKnownBits(&APtrPlus512, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + EXPECT_EQ(Known.Zero.getZExtValue(), ~512llu & ~(64llu - 1)); + EXPECT_EQ(Known.One.getZExtValue(), 512u | 32u); + // The known range is not precise given computeKnownBits works + // with the masks of zeros and ones, not the ranges. + EXPECT_EQ(Known.getMinValue(), 544); + EXPECT_EQ(Known.getMaxValue(), 575); +} + class IsBytewiseValueTest : public ValueTrackingTest, public ::testing::WithParamInterface< std::pair> {