Index: llvm/include/llvm/IR/DataLayout.h =================================================================== --- llvm/include/llvm/IR/DataLayout.h +++ llvm/include/llvm/IR/DataLayout.h @@ -579,6 +579,10 @@ /// This is used to implement getelementptr. int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef Indices) const; + /// Get GEP indices to access Offset inside ElemTy. ElemTy is updated to be + /// the result element type and Offset to be the risidual offset. + SmallVector getIndicesForOffset(Type *&ElemTy, APInt &Offset) const; + /// Returns a StructLayout object, indicating the alignment of the /// struct, its size, and the offsets of its fields. /// Index: llvm/lib/Analysis/ConstantFolding.cpp =================================================================== --- llvm/lib/Analysis/ConstantFolding.cpp +++ llvm/lib/Analysis/ConstantFolding.cpp @@ -985,8 +985,6 @@ // we eliminate over-indexing of the notional static type array bounds. // This makes it easy to determine if the getelementptr is "inbounds". // Also, this helps GlobalOpt do SROA on GlobalVariables. - SmallVector NewIdxs; - Type *Ty = PTy; // For GEPs of GlobalValues, use the value type even for opaque pointers. // Otherwise use an i8 GEP. @@ -997,68 +995,31 @@ else SrcElemTy = Type::getInt8Ty(Ptr->getContext()); - do { - if (!Ty->isStructTy()) { - if (Ty->isPointerTy()) { - // The only pointer indexing we'll do is on the first index of the GEP. - if (!NewIdxs.empty()) - break; - - Ty = SrcElemTy; + if (!SrcElemTy->isSized()) + return nullptr; - // Only handle pointers to sized types, not pointers to functions. - if (!Ty->isSized()) - return nullptr; - } else { - Type *NextTy = GetElementPtrInst::getTypeAtIndex(Ty, (uint64_t)0); - if (!NextTy) - break; - Ty = NextTy; - } + Type *ElemTy = SrcElemTy; + SmallVector Indices = DL.getIndicesForOffset(ElemTy, Offset); + if (Offset != 0) + return nullptr; - // Determine which element of the array the offset points into. - APInt ElemSize(BitWidth, DL.getTypeAllocSize(Ty)); - if (ElemSize == 0) { - // The element size is 0. This may be [0 x Ty]*, so just use a zero - // index for this level and proceed to the next level to see if it can - // accommodate the offset. - NewIdxs.push_back(ConstantInt::get(IntIdxTy, 0)); - } else { - // The element size is non-zero divide the offset by the element - // size (rounding down), to compute the index at this level. - bool Overflow; - APInt NewIdx = Offset.sdiv_ov(ElemSize, Overflow); - if (Overflow) - break; - Offset -= NewIdx * ElemSize; - NewIdxs.push_back(ConstantInt::get(IntIdxTy, NewIdx)); - } - } else { - auto *STy = cast(Ty); - // If we end up with an offset that isn't valid for this struct type, we - // can't re-form this GEP in a regular form, so bail out. The pointer - // operand likely went through casts that are necessary to make the GEP - // sensible. - const StructLayout &SL = *DL.getStructLayout(STy); - if (Offset.isNegative() || Offset.uge(SL.getSizeInBytes())) - break; + // Try to add additional zero indices to reach the desired result element + // type. + // TODO: Should we avoid extra zero indices if it can't be reached anyway? + while (ElemTy != ResElemTy) { + Type *NextTy = GetElementPtrInst::getTypeAtIndex(ElemTy, (uint64_t)0); + if (!NextTy) + break; - // Determine which field of the struct the offset points into. The - // getZExtValue is fine as we've already ensured that the offset is - // within the range representable by the StructLayout API. - unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue()); - NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), - ElIdx)); - Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx)); - Ty = STy->getTypeAtIndex(ElIdx); - } - } while (Ty != ResElemTy); + Indices.push_back(APInt::getZero(isa(ElemTy) ? 32 : BitWidth)); + ElemTy = NextTy; + } - // If we haven't used up the entire offset by descending the static - // type, then the offset is pointing into the middle of an indivisible - // member, so we can't simplify it. - if (Offset != 0) - return nullptr; + SmallVector NewIdxs; + for (const APInt &Index : Indices) + NewIdxs.push_back(ConstantInt::get(Type::getIntNTy(Ptr->getContext(), + Index.getBitWidth()), + Index)); // Preserve the inrange index from the innermost GEP if possible. We must // have calculated the same indices up to and including the inrange index. @@ -1075,7 +1036,8 @@ // Create a GEP. Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs, InBounds, InRangeIndex); - assert(cast(C->getType())->isOpaqueOrPointeeTypeMatches(Ty) && + assert(cast(C->getType()) + ->isOpaqueOrPointeeTypeMatches(ElemTy) && "Computed GetElementPtr has unexpected type!"); // If we ended up indexing a member with a type that doesn't match Index: llvm/lib/IR/DataLayout.cpp =================================================================== --- llvm/lib/IR/DataLayout.cpp +++ llvm/lib/IR/DataLayout.cpp @@ -896,6 +896,67 @@ return Result; } +static void addElementIndex(SmallVectorImpl &Indices, + TypeSize ElemSize, APInt &Offset) { + // Skip over scalable or zero size elements. + if (ElemSize.isScalable() || ElemSize == 0) { + Indices.push_back(APInt::getZero(Offset.getBitWidth())); + return; + } + + APInt Index = Offset.sdiv(ElemSize); + Offset -= Index * ElemSize; + if (Offset.isNegative()) { + // Prefer a positive remaining offset to allow struct indexing. + --Index; + Offset += ElemSize; + } + Indices.push_back(Index); +} + +SmallVector DataLayout::getIndicesForOffset(Type *&ElemTy, + APInt &Offset) const { + assert(ElemTy->isSized() && "Element type must be sized"); + SmallVector Indices; + addElementIndex(Indices, getTypeAllocSize(ElemTy), Offset); + while (Offset != 0) { + if (auto *ArrTy = dyn_cast(ElemTy)) { + ElemTy = ArrTy->getElementType(); + addElementIndex(Indices, getTypeAllocSize(ElemTy), Offset); + continue; + } + + if (auto *VecTy = dyn_cast(ElemTy)) { + ElemTy = VecTy->getElementType(); + unsigned ElemSizeInBits = getTypeSizeInBits(ElemTy).getFixedSize(); + // GEPs over non-multiple of 8 size vector elements are invalid. + if (ElemSizeInBits % 8 != 0) + break; + + addElementIndex(Indices, TypeSize::Fixed(ElemSizeInBits / 8), Offset); + continue; + } + + if (auto *STy = dyn_cast(ElemTy)) { + const StructLayout *SL = getStructLayout(STy); + uint64_t IntOffset = Offset.getZExtValue(); + if (IntOffset >= SL->getSizeInBytes()) + break; + + unsigned Index = SL->getElementContainingOffset(IntOffset); + Offset -= SL->getElementOffset(Index); + ElemTy = STy->getElementType(Index); + Indices.push_back(APInt(32, Index)); + continue; + } + + // Can't index into non-aggregate type. + break; + } + + return Indices; +} + /// getPreferredAlign - Return the preferred alignment of the specified global. /// This includes an explicitly requested alignment (if the global has one). Align DataLayout::getPreferredAlign(const GlobalVariable *GV) const { Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1269,61 +1269,19 @@ /// specified offset. If so, fill them into NewIndices and return the resultant /// element type, otherwise return null. Type * -InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t Offset, +InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t IntOffset, SmallVectorImpl &NewIndices) { Type *Ty = PtrTy->getElementType(); if (!Ty->isSized()) return nullptr; - // Start with the index over the outer type. Note that the type size - // might be zero (even if the offset isn't zero) if the indexed type - // is something like [0 x {int, int}] - Type *IndexTy = DL.getIndexType(PtrTy); - int64_t FirstIdx = 0; - if (int64_t TySize = DL.getTypeAllocSize(Ty)) { - FirstIdx = Offset/TySize; - Offset -= FirstIdx*TySize; - - // Handle hosts where % returns negative instead of values [0..TySize). - if (Offset < 0) { - --FirstIdx; - Offset += TySize; - assert(Offset >= 0); - } - assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); - } - - NewIndices.push_back(ConstantInt::get(IndexTy, FirstIdx)); - - // Index into the types. If we fail, set OrigBase to null. - while (Offset) { - // Indexing into tail padding between struct/array elements. - if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty)) - return nullptr; - - if (StructType *STy = dyn_cast(Ty)) { - const StructLayout *SL = DL.getStructLayout(STy); - assert(Offset < (int64_t)SL->getSizeInBytes() && - "Offset must stay within the indexed type"); - - unsigned Elt = SL->getElementContainingOffset(Offset); - NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), - Elt)); - - Offset -= SL->getElementOffset(Elt); - Ty = STy->getElementType(Elt); - } else if (ArrayType *AT = dyn_cast(Ty)) { - uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType()); - assert(EltSize && "Cannot index into a zero-sized array"); - NewIndices.push_back(ConstantInt::get(IndexTy,Offset/EltSize)); - Offset %= EltSize; - Ty = AT->getElementType(); - } else { - // Otherwise, we can't index into the middle of this atomic type, bail. - return nullptr; - } - } + APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), IntOffset); + SmallVector Indices = DL.getIndicesForOffset(Ty, Offset); + if (!Offset.isZero()) + return nullptr; + for (const APInt &Index : Indices) + NewIndices.push_back(Builder.getInt(Index)); return Ty; } Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -1483,76 +1483,6 @@ return buildGEP(IRB, BasePtr, Indices, NamePrefix); } -/// Recursively compute indices for a natural GEP. -/// -/// This is the recursive step for getNaturalGEPWithOffset that walks down the -/// element types adding appropriate indices for the GEP. -static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, - Value *Ptr, Type *Ty, APInt &Offset, - Type *TargetTy, - SmallVectorImpl &Indices, - const Twine &NamePrefix) { - if (Offset == 0) - return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices, - NamePrefix); - - // We can't recurse through pointer types. - if (Ty->isPointerTy()) - return nullptr; - - // We try to analyze GEPs over vectors here, but note that these GEPs are - // extremely poorly defined currently. The long-term goal is to remove GEPing - // over a vector from the IR completely. - if (VectorType *VecTy = dyn_cast(Ty)) { - unsigned ElementSizeInBits = - DL.getTypeSizeInBits(VecTy->getScalarType()).getFixedSize(); - if (ElementSizeInBits % 8 != 0) { - // GEPs over non-multiple of 8 size vector elements are invalid. - return nullptr; - } - APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8); - APInt NumSkippedElements = Offset.sdiv(ElementSize); - if (NumSkippedElements.ugt(cast(VecTy)->getNumElements())) - return nullptr; - Offset -= NumSkippedElements * ElementSize; - Indices.push_back(IRB.getInt(NumSkippedElements)); - return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(), - Offset, TargetTy, Indices, NamePrefix); - } - - if (ArrayType *ArrTy = dyn_cast(Ty)) { - Type *ElementTy = ArrTy->getElementType(); - APInt ElementSize(Offset.getBitWidth(), - DL.getTypeAllocSize(ElementTy).getFixedSize()); - APInt NumSkippedElements = Offset.sdiv(ElementSize); - if (NumSkippedElements.ugt(ArrTy->getNumElements())) - return nullptr; - - Offset -= NumSkippedElements * ElementSize; - Indices.push_back(IRB.getInt(NumSkippedElements)); - return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy, - Indices, NamePrefix); - } - - StructType *STy = dyn_cast(Ty); - if (!STy) - return nullptr; - - const StructLayout *SL = DL.getStructLayout(STy); - uint64_t StructOffset = Offset.getZExtValue(); - if (StructOffset >= SL->getSizeInBytes()) - return nullptr; - unsigned Index = SL->getElementContainingOffset(StructOffset); - Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index)); - Type *ElementTy = STy->getElementType(Index); - if (Offset.uge(DL.getTypeAllocSize(ElementTy).getFixedSize())) - return nullptr; // The offset points into alignment padding. - - Indices.push_back(IRB.getInt32(Index)); - return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy, - Indices, NamePrefix); -} - /// Get a natural GEP from a base pointer to a particular offset and /// resulting in a particular type. /// @@ -1577,18 +1507,15 @@ Type *ElementTy = Ty->getElementType(); if (!ElementTy->isSized()) return nullptr; // We can't GEP through an unsized element. - if (isa(ElementTy)) + + SmallVector IntIndices = DL.getIndicesForOffset(ElementTy, Offset); + if (Offset != 0) return nullptr; - APInt ElementSize(Offset.getBitWidth(), - DL.getTypeAllocSize(ElementTy).getFixedSize()); - if (ElementSize == 0) - return nullptr; // Zero-length arrays can't help us build a natural GEP. - APInt NumSkippedElements = Offset.sdiv(ElementSize); - - Offset -= NumSkippedElements * ElementSize; - Indices.push_back(IRB.getInt(NumSkippedElements)); - return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy, - Indices, NamePrefix); + + for (const APInt &Index : IntIndices) + Indices.push_back(IRB.getInt(Index)); + return getNaturalGEPWithType(IRB, DL, Ptr, ElementTy, TargetTy, Indices, + NamePrefix); } /// Compute an adjusted pointer from Ptr by Offset bytes where the Index: llvm/test/Transforms/InstCombine/getelementptr.ll =================================================================== --- llvm/test/Transforms/InstCombine/getelementptr.ll +++ llvm/test/Transforms/InstCombine/getelementptr.ll @@ -794,7 +794,7 @@ ; Don't treat signed offsets as unsigned. define i8* @test36() nounwind { ; CHECK-LABEL: @test36( -; CHECK-NEXT: ret i8* getelementptr ([11 x i8], [11 x i8]* @array, i64 0, i64 -1) +; CHECK-NEXT: ret i8* getelementptr ([11 x i8], [11 x i8]* @array, i64 -1, i64 10) ; ret i8* getelementptr ([11 x i8], [11 x i8]* @array, i32 0, i64 -1) } Index: llvm/test/Transforms/SROA/scalable-vectors.ll =================================================================== --- llvm/test/Transforms/SROA/scalable-vectors.ll +++ llvm/test/Transforms/SROA/scalable-vectors.ll @@ -71,11 +71,10 @@ define @cast_alloca_from_svint32_t() { ; CHECK-LABEL: @cast_alloca_from_svint32_t( ; CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast * [[RETVAL_COERCE]] to i8* -; CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast i8* [[TMP1]] to <16 x i32>* +; CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* ; CHECK-NEXT: store <16 x i32> undef, <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 -; CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 -; CHECK-NEXT: ret [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = load , * [[RETVAL_COERCE]], align 16 +; CHECK-NEXT: ret [[TMP1]] ; %retval = alloca <16 x i32> %retval.coerce = alloca