Index: llvm/lib/Transforms/IPO/Attributor.cpp =================================================================== --- llvm/lib/Transforms/IPO/Attributor.cpp +++ llvm/lib/Transforms/IPO/Attributor.cpp @@ -508,6 +508,127 @@ llvm_unreachable("Expected enum or string attribute!"); } +bool accumulateMinimalOffset(Attributor &A, const AbstractAttribute &QueryingAA, + const GEPOperator *gep, const DataLayout &DL, + APInt &Offset) { + assert(Offset.getBitWidth() == + DL.getIndexSizeInBits(gep->getPointerAddressSpace()) && + "The offset bit width does not match DL specification."); + + for (gep_type_iterator GTI = gep_type_begin(gep), GTE = gep_type_end(gep); + GTI != GTE; ++GTI) { + Value *V = GTI.getOperand(); + APInt MinimalIndex; + // Handle ConstantInt if possible. + if (auto ConstOffset = dyn_cast(V)) { + if (ConstOffset->isZero()) + continue; + + // Handle a struct index, which adds its field offset to the pointer. + // For array or vector indices, scale the index by the size of the type. + if (StructType *STy = GTI.getStructTypeOrNull()) { + unsigned ElementIdx = ConstOffset->getZExtValue(); + const StructLayout *SL = DL.getStructLayout(STy); + Offset += APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx)); + continue; + } + MinimalIndex = ConstOffset->getValue(); + } else { + // If the operand is not a ConstantInt try to use ValueConstantRangeAA. + const IRPosition &Pos = IRPosition::value(*V); + // No need to track dependence as long as we use the known info. + const AAValueConstantRange &ValueConstantRangeAA = + A.getAAFor(QueryingAA, Pos, + /* TrackDependence */ false); + // We can only use the lower part of the range because the upper part can + // be higher than what the value can really be. + MinimalIndex = ValueConstantRangeAA.getKnown().getSignedMin(); + } + APInt Index = MinimalIndex.sextOrTrunc(Offset.getBitWidth()); + APInt IndexedSize = + APInt(Offset.getBitWidth(), DL.getTypeAllocSize(GTI.getIndexedType())); + + // We must avoid overflow / underflow. + bool Overflow = false; + APInt OffsetPlus = Index.smul_ov(IndexedSize, Overflow); + if (Overflow) + return false; + Offset = Offset.sadd_ov(OffsetPlus, Overflow); + if (Overflow) + return false; + } + return true; +} + +const Value *stripAndAccumulateMinimalOffsets( + Attributor &A, const AbstractAttribute &QueryingAA, const Value *value, + const DataLayout &DL, APInt &Offset, bool AllowNonInbounds) { + if (!value->getType()->isPtrOrPtrVectorTy()) + return value; + + unsigned BitWidth = Offset.getBitWidth(); + assert(BitWidth == DL.getIndexTypeSizeInBits(value->getType()) && + "The offset bit width does not match the DL specification."); + + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet Visited; + Visited.insert(value); + const Value *V = value; + do { + if (auto *GEP = dyn_cast(V)) { + // If in-bounds was requested, we do not strip non-in-bounds GEPs. + if (!AllowNonInbounds && !GEP->isInBounds()) + return V; + + // If one of the values we have visited is an addrspacecast, then + // the pointer type of this GEP may be different from the type + // of the Ptr parameter which was passed to this function. This + // means when we construct GEPOffset, we need to use the size + // of GEP's pointer type rather than the size of the original + // pointer type. + APInt GEPOffset(DL.getIndexTypeSizeInBits(V->getType()), 0); + if (!accumulateMinimalOffset(A, QueryingAA, GEP, DL, GEPOffset)) + return V; + + // Stop traversal if the pointer offset wouldn't fit in the bit-width + // provided by the Offset argument. This can happen due to AddrSpaceCast + // stripping. + if (GEPOffset.getMinSignedBits() > BitWidth) + return V; + + Offset += GEPOffset.sextOrTrunc(BitWidth); + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast || + Operator::getOpcode(V) == Instruction::AddrSpaceCast) { + V = cast(V)->getOperand(0); + } else if (auto *GA = dyn_cast(V)) { + if (!GA->isInterposable()) + V = GA->getAliasee(); + } else if (const auto *Call = dyn_cast(V)) { + if (const Value *RV = Call->getReturnedArgOperand()) + V = RV; + } + assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!"); + } while (Visited.insert(V).second); + + return V; +} + +static const Value *getMinimalBaseOfAccsesPointerOperand( + Attributor &A, const AbstractAttribute &QueryingAA, const Instruction *I, + int64_t &BytesOffset, const DataLayout &DL, bool allowNonInbounds = false) { + const Value *Ptr = getPointerOperand(I, /* AllowVolatile */ false); + if (!Ptr) + return nullptr; + APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + const Value *Base = stripAndAccumulateMinimalOffsets( + A, QueryingAA, Ptr, DL, OffsetAPInt, allowNonInbounds); + + BytesOffset = OffsetAPInt.getSExtValue(); + return Base; +} + static const Value * getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset, const DataLayout &DL, @@ -1890,14 +2011,15 @@ TrackUse = true; return 0; } - if (auto *GEP = dyn_cast(I)) - if (GEP->hasAllConstantIndices()) { - TrackUse = true; - return 0; - } + if (isa(I)) { + TrackUse = true; + return 0; + } int64_t Offset; - if (const Value *Base = getBasePointerOfAccessPointerOperand(I, Offset, DL)) { + const Value *Base = + getMinimalBaseOfAccsesPointerOperand(A, QueryingAA, I, Offset, DL); + if (Base) { if (Base == &AssociatedValue && getPointerOperand(I, /* AllowVolatile */ false) == UseV) { int64_t DerefBytes = @@ -1909,8 +2031,9 @@ } /// Corner case when an offset is 0. - if (const Value *Base = getBasePointerOfAccessPointerOperand( - I, Offset, DL, /*AllowNonInbounds*/ true)) { + Base = getBasePointerOfAccessPointerOperand(I, Offset, DL, + /*AllowNonInbounds*/ true); + if (Base) { if (Offset == 0 && Base == &AssociatedValue && getPointerOperand(I, /* AllowVolatile */ false) == UseV) { int64_t DerefBytes = @@ -3437,6 +3560,10 @@ int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse( A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse); + LLVM_DEBUG(dbgs() << "[AADereferenceable] follow use called on " << *I + << "\n"); + LLVM_DEBUG(dbgs() << "[AADereferenceable] Deref bytes" << DerefBytes + << "\n"); addAccessedBytesForUse(A, U, I); takeKnownDerefBytesMaximum(DerefBytes); return TrackUse; @@ -3487,13 +3614,18 @@ ChangeStatus Change = Base::updateImpl(A); const DataLayout &DL = A.getDataLayout(); - - auto VisitValueCB = [&](Value &V, DerefState &T, bool Stripped) -> bool { + LLVM_DEBUG( + dbgs() + << "\n[AADereferenceableFloating] Trying to merge floating values"); + auto VisitValueCB = [&](const Value &V, DerefState &T, + bool Stripped) -> bool { + LLVM_DEBUG(dbgs() << "\n[AADereferenceableFloating] Looking at value" + << V); unsigned IdxWidth = DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace()); APInt Offset(IdxWidth, 0); const Value *Base = - V.stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + stripAndAccumulateMinimalOffsets(A, *this, &V, DL, Offset, false); const auto &AA = A.getAAFor(*this, IRPosition::value(*Base)); Index: llvm/test/Transforms/Attributor/dereferenceable-1.ll =================================================================== --- llvm/test/Transforms/Attributor/dereferenceable-1.ll +++ llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -attributor -attributor-manifest-internal --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=16 -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR +; RUN: opt -passes=attributor-cgscc --attributor-disable=false -attributor-manifest-internal -S < %s | FileCheck %s --check-prefixes=ATTRIBUTOR_NPM ; FIXME: Figure out why we need 16 iterations here. declare void @deref_phi_user(i32* %a); @@ -210,18 +211,25 @@ } ; TEST 8 -; Use Constant range in deereferenceable -; void g(int *p, long long int *range){ -; int r = *range ; // [10, 99] -; fill_range(p, *range); -; } - -; void fill_range(int* p, long long int start){ -; for(long long int i = start;i