Index: llvm/include/llvm/IR/Operator.h =================================================================== --- llvm/include/llvm/IR/Operator.h +++ llvm/include/llvm/IR/Operator.h @@ -547,13 +547,24 @@ /// Accumulate the constant address offset of this GEP if possible. /// - /// This routine accepts an APInt into which it will accumulate the constant - /// offset of this GEP if the GEP is in fact constant. If the GEP is not - /// all-constant, it returns false and the value of the offset APInt is - /// undefined (it is *not* preserved!). The APInt passed into this routine - /// must be at exactly as wide as the IntPtr type for the address space of the - /// base GEP pointer. - bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const; + /// This routine accepts an APInt into which it will try to accumulate the + /// constant offset of this GEP. + /// + /// If \p ExternalAnalysis is provided it will be used to calculate a offset + /// when a operand of GEP is not constant. + /// For example, for a value \p ExternalAnalysis might try to calculate a + /// lower bound. If \p ExternalAnalysis is successful, it should return true. + /// + /// If the \p ExternalAnalysis returns false or the value returned by \p + /// ExternalAnalysis results in a overflow/underflow, this routine returns + /// false and the value of the offset APInt is undefined (it is *not* + /// preserved!). + /// + /// The APInt passed into this routine must be at exactly as wide as the + /// IntPtr type for the address space of the base GEP pointer. + bool accumulateConstantOffset( + const DataLayout &DL, APInt &Offset, + function_ref ExternalAnalysis = nullptr) const; }; class PtrToIntOperator Index: llvm/include/llvm/IR/Value.h =================================================================== --- llvm/include/llvm/IR/Value.h +++ llvm/include/llvm/IR/Value.h @@ -595,18 +595,23 @@ } /// Accumulate the constant offset this value has compared to a base pointer. - /// Only 'getelementptr' instructions (GEPs) with constant indices are - /// accumulated but other instructions, e.g., casts, are stripped away as - /// well. The accumulated constant offset is added to \p Offset and the base + /// Only 'getelementptr' instructions (GEPs) are accumulated but other + /// instructions, e.g., casts, are stripped away as well. + /// The accumulated constant offset is added to \p Offset and the base /// pointer is returned. /// /// The APInt \p Offset has to have a bit-width equal to the IntPtr type for /// the address space of 'this' pointer value, e.g., use /// DataLayout::getIndexTypeSizeInBits(Ty). /// - /// If \p AllowNonInbounds is true, constant offsets in GEPs are stripped and + /// If \p AllowNonInbounds is true, offsets in GEPs are stripped and /// accumulated even if the GEP is not "inbounds". /// + /// If \p ExternalAnalysis is provided it will be used to calculate a offset + /// when a operand of GEP is not constant. + /// For example, for a value \p ExternalAnalysis might try to calculate a + /// lower bound. If \p ExternalAnalysis is successful, it should return true. + /// /// If this is called on a non-pointer value, it returns 'this' and the /// \p Offset is not modified. /// @@ -615,9 +620,10 @@ /// between the underlying value and the returned one. Thus, if no constant /// offset was found, the returned value is the underlying one and \p Offset /// is unchanged. - const Value *stripAndAccumulateConstantOffsets(const DataLayout &DL, - APInt &Offset, - bool AllowNonInbounds) const; + const Value *stripAndAccumulateConstantOffsets( + const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, + function_ref ExternalAnalysis = + nullptr) const; Value *stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds) { return const_cast( Index: llvm/lib/IR/Operator.cpp =================================================================== --- llvm/lib/IR/Operator.cpp +++ llvm/lib/IR/Operator.cpp @@ -30,38 +30,78 @@ return I->getResultElementType(); return cast(this)->getResultElementType(); } - -bool GEPOperator::accumulateConstantOffset(const DataLayout &DL, - APInt &Offset) const { +bool GEPOperator::accumulateConstantOffset( + const DataLayout &DL, APInt &Offset, + function_ref ExternalAnalysis) const { assert(Offset.getBitWidth() == DL.getIndexSizeInBits(getPointerAddressSpace()) && "The offset bit width does not match DL specification."); + bool UsedExternalAnalysis = false; + auto AccumulateOffset = [&](APInt Index, uint64_t Size) -> bool { + Index = Index.sextOrTrunc(Offset.getBitWidth()); + APInt IndexedSize = APInt(Offset.getBitWidth(), Size); + // For array or vector indices, scale the index by the size of the type. + if (!UsedExternalAnalysis) { + Offset += Index * IndexedSize; + } else { + // External Analysis can return a result higher/lower than the value + // represents. We need to detect overflow/underflow. + bool Overflow = false; + APInt OffsetPlus = Index.smul_ov(IndexedSize, Overflow); + if (Overflow) + return false; + Offset = Offset.sadd_ov(OffsetPlus, Overflow); + if (Overflow) + return false; + } + return true; + }; + for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this); GTI != GTE; ++GTI) { - ConstantInt *OpC = dyn_cast(GTI.getOperand()); - if (!OpC) - return false; - if (OpC->isZero()) - continue; - - // Scalable vectors have are multiplied by a runtime constant. + // Scalable vectors are have to be multiplied by a runtime constant. + bool ScalableType = false; if (auto *VecTy = dyn_cast(GTI.getIndexedType())) - if (VecTy->isScalable()) - return false; + ScalableType = VecTy->isScalable(); - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = GTI.getStructTypeOrNull()) { - unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = DL.getStructLayout(STy); - Offset += APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx)); + Value *V = GTI.getOperand(); + StructType *STy = GTI.getStructTypeOrNull(); + // Handle ConstantInt if possible. + if (auto ConstOffset = dyn_cast(V)) { + if (ConstOffset->isZero()) + continue; + // if the type is scalable and the constant is not zero (vscale * n * 0 = + // 0) bailout. + if (ScalableType) + return false; + // Handle a struct index, which adds its field offset to the pointer. + if (STy) { + unsigned ElementIdx = ConstOffset->getZExtValue(); + const StructLayout *SL = DL.getStructLayout(STy); + // Element offset is in bytes. + if (!AccumulateOffset( + APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx)), + 1)) + return false; + continue; + } + if (!AccumulateOffset(ConstOffset->getValue(), + DL.getTypeAllocSize(GTI.getIndexedType()))) + return false; continue; } - - // For array or vector indices, scale the index by the size of the type. - APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth()); - Offset += Index * APInt(Offset.getBitWidth(), - DL.getTypeAllocSize(GTI.getIndexedType())); + // The operand is not constant, check if an external analysis was provided. + // External analsis is not applicable to a struct type. + if (!ExternalAnalysis || STy || ScalableType) + return false; + APInt AnalysisIndex; + if (!ExternalAnalysis(*V, AnalysisIndex)) + return false; + UsedExternalAnalysis = true; + if (!AccumulateOffset(AnalysisIndex, + DL.getTypeAllocSize(GTI.getIndexedType()))) + return false; } return true; } Index: llvm/lib/IR/Value.cpp =================================================================== --- llvm/lib/IR/Value.cpp +++ llvm/lib/IR/Value.cpp @@ -596,9 +596,9 @@ return stripPointerCastsAndOffsets(this); } -const Value * -Value::stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, - bool AllowNonInbounds) const { +const Value *Value::stripAndAccumulateConstantOffsets( + const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, + function_ref ExternalAnalysis) const { if (!getType()->isPtrOrPtrVectorTy()) return this; @@ -624,7 +624,7 @@ // of GEP's pointer type rather than the size of the original // pointer type. APInt GEPOffset(DL.getIndexTypeSizeInBits(V->getType()), 0); - if (!GEP->accumulateConstantOffset(DL, GEPOffset)) + if (!GEP->accumulateConstantOffset(DL, GEPOffset, ExternalAnalysis)) return V; // Stop traversal if the pointer offset wouldn't fit in the bit-width @@ -633,7 +633,20 @@ if (GEPOffset.getMinSignedBits() > BitWidth) return V; - Offset += GEPOffset.sextOrTrunc(BitWidth); + // External Analysis can return a result higher/lower than the value + // represents. We need to detect overflow/underflow. + APInt GEPOffsetST = GEPOffset.sextOrTrunc(BitWidth); + if (!ExternalAnalysis) { + Offset += GEPOffsetST; + } else { + bool Overflow = false; + APInt OldOffset = Offset; + Offset = Offset.sadd_ov(GEPOffsetST, Overflow); + if (Overflow) { + Offset = OldOffset; + return V; + } + } V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast || Operator::getOpcode(V) == Instruction::AddrSpaceCast) { Index: llvm/lib/Transforms/IPO/AttributorAttributes.cpp =================================================================== --- llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -332,6 +332,43 @@ return true; } +const Value *stripAndAccumulateMinimalOffsets( + Attributor &A, const AbstractAttribute &QueryingAA, const Value *Val, + const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, + bool UseAssumed = false) { + + auto AttributorAnalysis = [&](Value &V, APInt &ROffset) -> bool { + const IRPosition &Pos = IRPosition::value(V); + // Only track dependence if we are going to use the assumed info. + const AAValueConstantRange &ValueConstantRangeAA = + A.getAAFor(QueryingAA, Pos, + /* TrackDependence */ UseAssumed); + ConstantRange Range = UseAssumed ? ValueConstantRangeAA.getAssumed() + : ValueConstantRangeAA.getKnown(); + // We can only use the lower part of the range because the upper part can + // be higher than what the value can really be. + ROffset = Range.getSignedMin(); + return true; + }; + + return Val->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds, + AttributorAnalysis); +} + +static const Value *getMinimalBaseOfAccsesPointerOperand( + Attributor &A, const AbstractAttribute &QueryingAA, const Instruction *I, + int64_t &BytesOffset, const DataLayout &DL, bool AllowNonInbounds = false) { + const Value *Ptr = getPointerOperand(I, /* AllowVolatile */ false); + if (!Ptr) + return nullptr; + APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + const Value *Base = stripAndAccumulateMinimalOffsets( + A, QueryingAA, Ptr, DL, OffsetAPInt, AllowNonInbounds); + + BytesOffset = OffsetAPInt.getSExtValue(); + return Base; +} + static const Value * getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset, const DataLayout &DL, @@ -1585,14 +1622,16 @@ TrackUse = true; return 0; } - if (auto *GEP = dyn_cast(I)) - if (GEP->hasAllConstantIndices()) { - TrackUse = true; - return 0; - } + + if (isa(I)) { + TrackUse = true; + return 0; + } int64_t Offset; - if (const Value *Base = getBasePointerOfAccessPointerOperand(I, Offset, DL)) { + const Value *Base = + getMinimalBaseOfAccsesPointerOperand(A, QueryingAA, I, Offset, DL); + if (Base) { if (Base == &AssociatedValue && getPointerOperand(I, /* AllowVolatile */ false) == UseV) { int64_t DerefBytes = @@ -1604,8 +1643,9 @@ } /// Corner case when an offset is 0. - if (const Value *Base = getBasePointerOfAccessPointerOperand( - I, Offset, DL, /*AllowNonInbounds*/ true)) { + Base = getBasePointerOfAccessPointerOperand(I, Offset, DL, + /*AllowNonInbounds*/ true); + if (Base) { if (Offset == 0 && Base == &AssociatedValue && getPointerOperand(I, /* AllowVolatile */ false) == UseV) { int64_t DerefBytes = @@ -3268,6 +3308,8 @@ bool TrackUse = false; int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse( A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse); + LLVM_DEBUG(dbgs() << "[AADereferenceable] Deref bytes: " << DerefBytes + << " for instruction " << *I << "\n"); addAccessedBytesForUse(A, U, I, State); State.takeKnownDerefBytesMaximum(DerefBytes); @@ -3320,13 +3362,13 @@ const DataLayout &DL = A.getDataLayout(); - auto VisitValueCB = [&](Value &V, const Instruction *, DerefState &T, + auto VisitValueCB = [&](const Value &V, const Instruction *, DerefState &T, bool Stripped) -> bool { unsigned IdxWidth = DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace()); APInt Offset(IdxWidth, 0); const Value *Base = - V.stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + stripAndAccumulateMinimalOffsets(A, *this, &V, DL, Offset, false); const auto &AA = A.getAAFor(*this, IRPosition::value(*Base)); @@ -3343,7 +3385,6 @@ T.GlobalState &= DS.GlobalState; } - // TODO: Use `AAConstantRange` to infer dereferenceable bytes. // For now we do not try to "increase" dereferenceability due to negative // indices as we first have to come up with code to deal with loops and Index: llvm/test/Transforms/Attributor/dereferenceable-1.ll =================================================================== --- llvm/test/Transforms/Attributor/dereferenceable-1.ll +++ llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes ; RUN: opt -attributor -attributor-manifest-internal --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=16 -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR +; RUN: opt -passes=attributor-cgscc --attributor-disable=false -attributor-manifest-internal -S < %s | FileCheck %s --check-prefixes=ATTRIBUTOR_NPM + + ; FIXME: Figure out why we need 16 iterations here. ; UTC_ARGS: --disable @@ -209,8 +212,38 @@ ; UTC_ARGS: --enable -; TEST 8 -; Use Constant range in deereferenceable +; TEST 8 (positive case) +; Use constant range in dereferenceable + +define void @test8(i8* %ptr) #0 { +; FIXME: %ptr should be dereferenceable(31) +; ATTRIBUTOR_NPM: define void @test8(i8* nocapture nofree nonnull writeonly dereferenceable(21) %ptr) + br label %1 +1: ; preds = %5, %0 + %i.0 = phi i32 [ 20, %0 ], [ %4, %5 ] + %2 = sext i32 %i.0 to i64 + %3 = getelementptr inbounds i8, i8* %ptr, i64 %2 + store i8 32, i8* %3, align 1 + %4 = add nsw i32 %i.0, 1 + br label %5 +5: ; preds = %1 + %6 = icmp slt i32 %4, 30 + br i1 %6, label %1, label %7 + +7: ; preds = %5 + ret void +} +; 8.2 (negative case) +; ATTRIBUTOR_NPM:void @test8_neg +; ATTRIBUTOR_NPM-NOT: dereferenceable +; ATTRIBUTOR_NPM-SAME: %ptr +define void @test8_neg(i32 %i, i8* %ptr) #0 { + %1 = sext i32 %i to i64 + %2 = getelementptr inbounds i8, i8* %ptr, i64 %1 + store i8 65, i8* %2, align 1 + ret void +} + ; void g(int *p, long long int *range){ ; int r = *range ; // [10, 99] ; fill_range(p, *range); Index: llvm/test/Transforms/Attributor/willreturn.ll =================================================================== --- llvm/test/Transforms/Attributor/willreturn.ll +++ llvm/test/Transforms/Attributor/willreturn.ll @@ -306,7 +306,7 @@ ; ATTRIBUTOR_MODULE: Function Attrs: argmemonly nofree noinline nosync nounwind readonly uwtable willreturn ; ATTRIBUTOR_CGSCC: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly uwtable willreturn -; ATTRIBUTOR-NEXT: define i32 @loop_constant_trip_count(i32* nocapture nofree readonly %0) +; ATTRIBUTOR-NEXT: define i32 @loop_constant_trip_count(i32* nocapture nofree nonnull readonly dereferenceable(4) %0) define i32 @loop_constant_trip_count(i32* nocapture readonly %0) #0 { br label %3