diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Intrinsics.h" #include @@ -29,7 +30,6 @@ class AddOperator; class APInt; class AssumptionCache; -class DataLayout; class DominatorTree; class GEPOperator; class IntrinsicInst; @@ -238,8 +238,18 @@ /// Analyze the specified pointer to see if it can be expressed as a base /// pointer plus a constant offset. Return the base and offset to the caller. - Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout &DL); + /// + /// This is a wrapper around Value::stripAndAccumulateConstantOffsets that + /// creates and later unpacks the required APInt. + inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, + const DataLayout &DL) { + APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Value *Base = + Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, + /* AllowNonInbounds */ true); + Offset = OffsetAPInt.getSExtValue(); + return Base; + } inline const Value *GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset, const DataLayout &DL) { diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -546,19 +546,48 @@ static_cast(this)->stripInBoundsConstantOffsets()); } - /// Accumulate offsets from \a stripInBoundsConstantOffsets(). - /// - /// Stores the resulting constant offset stripped into the APInt provided. - /// The provided APInt will be extended or truncated as needed to be the - /// correct bitwidth for an offset of this pointer type. - /// - /// If this is called on a non-pointer value, it returns 'this'. + /// Accumulate the constant offset this value has compared to a base pointer. + /// Only 'getelementptr' instructions (GEPs) with constant indices are + /// accumulated but other instructions, e.g., casts, are stripped away as + /// well. The accumulated constant offset is added to \p Offset and the base + /// pointer is returned. + /// + /// The APInt \p Offset has to have a bit-width equal to the IntPtr type for + /// the address space of 'this' pointer value, e.g., use + /// DataLayout::getIndexTypeSizeInBits(Ty). + /// + /// If \p AllowNonInbounds is true, constant offsets in GEPs are stripped and + /// accumulated even if the GEP is not "inbounds". + /// + /// If this is called on a non-pointer value, it returns 'this' and the + /// \p Offset is not modified. + /// + /// Note that this function will never return a nullptr. It will also never + /// manipulate the \p Offset in a way that would not match the difference + /// between the underlying value and the returned one. Thus, if no constant + /// offset was found, the returned value is the underlying one and \p Offset + /// is unchanged. + const Value *stripAndAccumulateConstantOffsets(const DataLayout &DL, + APInt &Offset, + bool AllowNonInbounds) const; + Value *stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, + bool AllowNonInbounds) { + return const_cast( + static_cast(this)->stripAndAccumulateConstantOffsets( + DL, Offset, AllowNonInbounds)); + } + + /// This is a wrapper around stripAndAccumulateConstantOffsets with the + /// in-bounds requirement set to false. const Value *stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, - APInt &Offset) const; + APInt &Offset) const { + return stripAndAccumulateConstantOffsets(DL, Offset, + /* AllowNonInbounds */ false); + } Value *stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) { - return const_cast(static_cast(this) - ->stripAndAccumulateInBoundsConstantOffsets(DL, Offset)); + return stripAndAccumulateConstantOffsets(DL, Offset, + /* AllowNonInbounds */ false); } /// Strip off pointer casts and inbounds GEPs. diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -659,32 +659,7 @@ Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); - // Even though we don't look through PHI nodes, we could be called on an - // instruction in an unreachable block, which may be on a cycle. - SmallPtrSet Visited; - Visited.insert(V); - do { - if (GEPOperator *GEP = dyn_cast(V)) { - if ((!AllowNonInbounds && !GEP->isInBounds()) || - !GEP->accumulateConstantOffset(DL, Offset)) - break; - V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { - V = cast(V)->getOperand(0); - } else if (GlobalAlias *GA = dyn_cast(V)) { - if (GA->isInterposable()) - break; - V = GA->getAliasee(); - } else { - if (auto *Call = dyn_cast(V)) - if (Value *RV = Call->getReturnedArgOperand()) { - V = RV; - continue; - } - break; - } - assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!"); - } while (Visited.insert(V).second); + V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds); Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); if (V->getType()->isVectorTy()) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -38,7 +38,6 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" @@ -3422,57 +3421,6 @@ return nullptr; } -/// Analyze the specified pointer to see if it can be expressed as a base -/// pointer plus a constant offset. Return the base and offset to the caller. -Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout &DL) { - unsigned BitWidth = DL.getIndexTypeSizeInBits(Ptr->getType()); - APInt ByteOffset(BitWidth, 0); - - // We walk up the defs but use a visited set to handle unreachable code. In - // that case, we stop after accumulating the cycle once (not that it - // matters). - SmallPtrSet Visited; - while (Visited.insert(Ptr).second) { - if (Ptr->getType()->isVectorTy()) - break; - - if (GEPOperator *GEP = dyn_cast(Ptr)) { - // If one of the values we have visited is an addrspacecast, then - // the pointer type of this GEP may be different from the type - // of the Ptr parameter which was passed to this function. This - // means when we construct GEPOffset, we need to use the size - // of GEP's pointer type rather than the size of the original - // pointer type. - APInt GEPOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); - if (!GEP->accumulateConstantOffset(DL, GEPOffset)) - break; - - APInt OrigByteOffset(ByteOffset); - ByteOffset += GEPOffset.sextOrTrunc(ByteOffset.getBitWidth()); - if (ByteOffset.getMinSignedBits() > 64) { - // Stop traversal if the pointer offset wouldn't fit into int64_t - // (this should be removed if Offset is updated to an APInt) - ByteOffset = OrigByteOffset; - break; - } - - Ptr = GEP->getPointerOperand(); - } else if (Operator::getOpcode(Ptr) == Instruction::BitCast || - Operator::getOpcode(Ptr) == Instruction::AddrSpaceCast) { - Ptr = cast(Ptr)->getOperand(0); - } else if (GlobalAlias *GA = dyn_cast(Ptr)) { - if (GA->isInterposable()) - break; - Ptr = GA->getAliasee(); - } else { - break; - } - } - Offset = ByteOffset.getSExtValue(); - return Ptr; -} - bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP, unsigned CharSize) { // Make sure the GEP has exactly three arguments. @@ -4400,7 +4348,7 @@ // Note: It's really tempting to think that a conditional branch or // switch should be listed here, but that's incorrect. It's not // branching off of poison which is UB, it is executing a side effecting - // instruction which follows the branch. + // instruction which follows the branch. return nullptr; } } diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -555,13 +555,13 @@ } const Value * -Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, - APInt &Offset) const { - if (!getType()->isPointerTy()) +Value::stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, + bool AllowNonInbounds) const { + if (!getType()->isPtrOrPtrVectorTy()) return this; - assert(Offset.getBitWidth() == DL.getIndexSizeInBits(cast( - getType())->getAddressSpace()) && + unsigned BitWidth = Offset.getBitWidth(); + assert(BitWidth == DL.getIndexTypeSizeInBits(getType()) && "The offset bit width does not match the DL specification."); // Even though we don't look through PHI nodes, we could be called on an @@ -571,27 +571,39 @@ const Value *V = this; do { if (auto *GEP = dyn_cast(V)) { - if (!GEP->isInBounds()) + // If in-bounds was requested, we do not strip non-in-bounds GEPs. + if (!AllowNonInbounds && !GEP->isInBounds()) return V; - APInt GEPOffset(Offset); + + // If one of the values we have visited is an addrspacecast, then + // the pointer type of this GEP may be different from the type + // of the Ptr parameter which was passed to this function. This + // means when we construct GEPOffset, we need to use the size + // of GEP's pointer type rather than the size of the original + // pointer type. + APInt GEPOffset(DL.getIndexTypeSizeInBits(V->getType()), 0); if (!GEP->accumulateConstantOffset(DL, GEPOffset)) return V; - Offset = GEPOffset; + + // Stop traversal if the pointer offset wouldn't fit in the bit-width + // provided by the Offset argument. This can happen due to AddrSpaceCast + // stripping. + if (GEPOffset.getMinSignedBits() > BitWidth) + return V; + + Offset += GEPOffset.sextOrTrunc(BitWidth); V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { + } else if (Operator::getOpcode(V) == Instruction::BitCast || + Operator::getOpcode(V) == Instruction::AddrSpaceCast) { V = cast(V)->getOperand(0); } else if (auto *GA = dyn_cast(V)) { - V = GA->getAliasee(); - } else { - if (const auto *Call = dyn_cast(V)) - if (const Value *RV = Call->getReturnedArgOperand()) { + if (!GA->isInterposable()) + V = GA->getAliasee(); + } else if (const auto *Call = dyn_cast(V)) { + if (const Value *RV = Call->getReturnedArgOperand()) V = RV; - continue; - } - - return V; } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!"); } while (Visited.insert(V).second); return V; diff --git a/llvm/test/Transforms/InstCombine/gep-addrspace.ll b/llvm/test/Transforms/InstCombine/gep-addrspace.ll --- a/llvm/test/Transforms/InstCombine/gep-addrspace.ll +++ b/llvm/test/Transforms/InstCombine/gep-addrspace.ll @@ -42,15 +42,14 @@ declare void @escape_alloca(i16*) -; check that addrspacecast is not ignored (leading to an assertion failure) -; when trying to mark a GEP as inbounds +; check that addrspacecast is stripped when trying to mark a GEP as inbounds define { i8, i8 } @inbounds_after_addrspacecast() { ; CHECK-LABEL: @inbounds_after_addrspacecast( ; CHECK-NEXT: [[T0:%.*]] = alloca i16, align 2 ; CHECK-NEXT: call void @escape_alloca(i16* nonnull [[T0]]) ; CHECK-NEXT: [[TMPCAST:%.*]] = bitcast i16* [[T0]] to [2 x i8]* ; CHECK-NEXT: [[T1:%.*]] = addrspacecast [2 x i8]* [[TMPCAST]] to [2 x i8] addrspace(11)* -; CHECK-NEXT: [[T2:%.*]] = getelementptr [2 x i8], [2 x i8] addrspace(11)* [[T1]], i64 0, i64 1 +; CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [2 x i8], [2 x i8] addrspace(11)* [[T1]], i64 0, i64 1 ; CHECK-NEXT: [[T3:%.*]] = load i8, i8 addrspace(11)* [[T2]], align 1 ; CHECK-NEXT: [[INSERT:%.*]] = insertvalue { i8, i8 } zeroinitializer, i8 [[T3]], 1 ; CHECK-NEXT: ret { i8, i8 } [[INSERT]]