diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3627,11 +3627,10 @@ In general vector elements are laid out in memory in the same way as :ref:`array types `. Such an analogy works fine as long as the vector -elements are byte sized. However, when the elements of the vector aren't byte -sized it gets a bit more complicated. One way to describe the layout is by -describing what happens when a vector such as is bitcasted to an -integer type with N*M bits, and then following the rules for storing such an -integer to memory. +elements are byte sized and naturally aligned. Otherwise, it gets a bit more +complicated. One way to describe the layout is by describing what happens +when a vector such as is bitcasted to an integer type with N*M bits, +and then following the rules for storing such an integer to memory. A bitcast from a vector type to a scalar integer type will see the elements being packed together (without padding). The order in which elements are diff --git a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h --- a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h +++ b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h @@ -31,7 +31,7 @@ class generic_gep_type_iterator { ItTy OpIt; - PointerUnion CurTy; + PointerUnion CurTy; generic_gep_type_iterator() = default; @@ -70,7 +70,9 @@ Type *getIndexedType() const { if (auto *T = CurTy.dyn_cast()) return T; - return CurTy.get()->getTypeAtIndex(getOperand()); + if (auto *STy = CurTy.dyn_cast()) + return STy->getTypeAtIndex(getOperand()); + return CurTy.get()->getElementType(); } Value *getOperand() const { return const_cast(&**OpIt); } @@ -80,7 +82,7 @@ if (auto *ATy = dyn_cast(Ty)) CurTy = ATy->getElementType(); else if (auto *VTy = dyn_cast(Ty)) - CurTy = VTy->getElementType(); + CurTy = VTy; else CurTy = dyn_cast(Ty); ++OpIt; @@ -109,7 +111,8 @@ // that. bool isStruct() const { return CurTy.is(); } - bool isSequential() const { return CurTy.is(); } + bool isVector() const { return CurTy.is(); } + bool isSequential() const { return !isStruct(); } StructType *getStructType() const { return CurTy.get(); } diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -387,6 +387,17 @@ (SubclassOptionalData & ~IsInBounds) | (B * IsInBounds); } + // Get the size of the indexed element in its containing outer type. + // + // If OuterType is a VectorType, the unpadded element size is returned, + // which must be byte-aligned. + // Otherwise (if OuterType is a StructType or ArrayType), the indexed + // element's AllocSize is returned. + // + // Useful to compute byte-based offsets of elements within the outer type. + static TypeSize getElementSize(const DataLayout &DL, Type *ElementTy, + bool OuterIsVector); + public: /// Test whether this is an inbounds GEP, as defined by LangRef.html. bool isInBounds() const { diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -56,6 +56,16 @@ return cast(this)->getResultElementType(); } +TypeSize GEPOperator::getElementSize(const DataLayout &DL, Type *ElementTy, + bool OuterIsVector) { + if (!OuterIsVector) + return DL.getTypeAllocSize(ElementTy); + + auto BitSize = DL.getTypeSizeInBits(ElementTy); + assert(BitSize % 8 == 0 && "GEP element size must be byte-aligned!"); + return {BitSize / 8, BitSize.isScalable()}; +} + Align GEPOperator::getMaxPreservedAlignment(const DataLayout &DL) const { /// compute the worse possible offset for every level of the GEP et accumulate /// the minimum alignment into Result. @@ -76,7 +86,8 @@ int64_t ElemCount = 1; if (OpC) ElemCount = OpC->getZExtValue(); - Offset = DL.getTypeAllocSize(GTI.getIndexedType()) * ElemCount; + Offset = + getElementSize(DL, GTI.getIndexedType(), GTI.isVector()) * ElemCount; } Result = Align(MinAlign(Offset, Result.value())); } @@ -147,8 +158,9 @@ return false; continue; } - if (!AccumulateOffset(ConstOffset->getValue(), - DL.getTypeAllocSize(GTI.getIndexedType()))) + if (!AccumulateOffset( + ConstOffset->getValue(), + getElementSize(DL, GTI.getIndexedType(), GTI.isVector()))) return false; continue; } @@ -161,8 +173,9 @@ if (!ExternalAnalysis(*V, AnalysisIndex)) return false; UsedExternalAnalysis = true; - if (!AccumulateOffset(AnalysisIndex, - DL.getTypeAllocSize(GTI.getIndexedType()))) + if (!AccumulateOffset( + AnalysisIndex, + getElementSize(DL, GTI.getIndexedType(), GTI.isVector()))) return false; } return true; @@ -208,15 +221,16 @@ 1); continue; } - CollectConstantOffset(ConstOffset->getValue(), - DL.getTypeAllocSize(GTI.getIndexedType())); + CollectConstantOffset( + ConstOffset->getValue(), + getElementSize(DL, GTI.getIndexedType(), GTI.isVector())); continue; } if (STy || ScalableType) return false; - APInt IndexedSize = - APInt(BitWidth, DL.getTypeAllocSize(GTI.getIndexedType())); + APInt IndexedSize = APInt( + BitWidth, getElementSize(DL, GTI.getIndexedType(), GTI.isVector())); // Insert an initial offset of 0 for V iff none exists already, then // increment the offset by IndexedSize. if (!IndexedSize.isZero()) { diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1508,6 +1508,9 @@ Value *Ptr, APInt Offset, Type *TargetTy, SmallVectorImpl &Indices, const Twine &NamePrefix) { +#ifndef NDEBUG + APInt OrigOffset = Offset; +#endif PointerType *Ty = cast(Ptr->getType()); // Don't consider any GEPs through an i8* as natural unless the TargetTy is @@ -1525,8 +1528,20 @@ for (const APInt &Index : IntIndices) Indices.push_back(IRB.getInt(Index)); - return getNaturalGEPWithType(IRB, DL, Ptr, ElementTy, TargetTy, Indices, - NamePrefix); + Value *Result = getNaturalGEPWithType(IRB, DL, Ptr, ElementTy, TargetTy, + Indices, NamePrefix); +#ifndef NDEBUG + auto *GEP = dyn_cast(Result); + if (GEP && GEP->getPointerOperand() == Ptr) { + APInt GEPOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); + assert(GEP->accumulateConstantOffset(DL, GEPOffset) && + "Expected GEP with constant offset!"); + assert(APInt::isSameValue(GEPOffset, OrigOffset) && + "GEP has incorrect offset!"); + } +#endif + + return Result; } /// Compute an adjusted pointer from Ptr by Offset bytes where the diff --git a/llvm/test/Transforms/SROA/overaligned-datalayout.ll b/llvm/test/Transforms/SROA/overaligned-datalayout.ll --- a/llvm/test/Transforms/SROA/overaligned-datalayout.ll +++ b/llvm/test/Transforms/SROA/overaligned-datalayout.ll @@ -36,7 +36,12 @@ %VecStruct = type { <4 x i16> } define i8 @test_vector_bitcast_i8() { ; OVERALIGNED-LABEL: @test_vector_bitcast_i8( -; OVERALIGNED-NEXT: ret i8 poison +; OVERALIGNED-NEXT: [[ALLOCA_SROA_0:%.*]] = alloca <4 x i16>, align 8 +; OVERALIGNED-NEXT: store <4 x i16> , <4 x i16>* [[ALLOCA_SROA_0]], align 8 +; OVERALIGNED-NEXT: [[ALLOCA_SROA_0_6_I8PTROFFSET_SROA_IDX1:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[ALLOCA_SROA_0]], i64 0, i64 3 +; OVERALIGNED-NEXT: [[ALLOCA_SROA_0_6_I8PTROFFSET_SROA_CAST2:%.*]] = bitcast i16* [[ALLOCA_SROA_0_6_I8PTROFFSET_SROA_IDX1]] to i8* +; OVERALIGNED-NEXT: [[ALLOCA_SROA_0_6_ALLOCA_SROA_0_6_RES:%.*]] = load i8, i8* [[ALLOCA_SROA_0_6_I8PTROFFSET_SROA_CAST2]], align 2 +; OVERALIGNED-NEXT: ret i8 [[ALLOCA_SROA_0_6_ALLOCA_SROA_0_6_RES]] ; ; NATURAL-LABEL: @test_vector_bitcast_i8( ; NATURAL-NEXT: [[ALLOCA_SROA_0:%.*]] = alloca <4 x i16>, align 8 diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp --- a/llvm/unittests/IR/InstructionsTest.cpp +++ b/llvm/unittests/IR/InstructionsTest.cpp @@ -556,6 +556,65 @@ delete PtrVecB; } +TEST(InstructionsTest, GepOffsets) { + // Test byte-based offsets of GEPs into vectors and arrays, + // including the case of overaligned element types. + LLVMContext C; + DataLayout DefaultDL( + "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3" + "2:32:32-f64:64:64-v64:64:64-v128:128:128-a:0:64-s:64:64-f80" + ":128:128-n8:16:32:64-S128"); + DataLayout AlignMin32DL( + "e-p:64:64:64-i1:8:8-i8:8:8-i16:32:32-i32:32:32-i64:64:64-f3" + "2:32:32-f64:64:64-v64:64:64-v128:128:128-a:0:64-s:64:64-f80" + ":128:128-n8:16:32:64-S128"); + DataLayout AlignMin64DL( + "e-p:64:64:64-i1:8:8-i8:8:8-i16:64:64-i32:64:64-i64:64:64-f3" + "2:64:64-f64:64:64-v64:64:64-v128:128:128-a:0:64-s:64:64-f80" + ":128:128-n8:16:32:64-S128"); + + for (uint64_t ElemBitWidth : {8, 16, 24, 32, 64}) { + IntegerType *ElemTy = IntegerType::get(C, ElemBitWidth); + EXPECT_EQ(DefaultDL.getTypeSizeInBits(ElemTy), ElemBitWidth); + + { + // Check GEP into vector + VectorType *VecTy = FixedVectorType::get(ElemTy, 8); + Constant *VectorNullPtr = Constant::getNullValue(VecTy->getPointerTo()); + std::unique_ptr Gep(GetElementPtrInst::Create( + VecTy, VectorNullPtr, + {ConstantInt::get(Type::getInt32Ty(C), 0), + ConstantInt::get(Type::getInt32Ty(C), 1)})); + + for (const DataLayout *DL : {&DefaultDL, &AlignMin32DL, &AlignMin64DL}) { + EXPECT_EQ(DL->getTypeSizeInBits(VecTy), ElemBitWidth * 8); + APInt GEPOffset(DL->getIndexTypeSizeInBits(Gep->getType()), 0); + EXPECT_TRUE(Gep->accumulateConstantOffset(*DL, GEPOffset)); + EXPECT_EQ(DL->getTypeSizeInBits(ElemTy), ElemBitWidth); + EXPECT_EQ(GEPOffset.getZExtValue(), ElemBitWidth / 8); + } + } + { + // Check GEP into array + ArrayType *ArrTy = ArrayType::get(ElemTy, 8); + Constant *ArrayNullPtr = Constant::getNullValue(ArrTy->getPointerTo()); + std::unique_ptr Gep(GetElementPtrInst::Create( + ArrTy, ArrayNullPtr, + {ConstantInt::get(Type::getInt32Ty(C), 0), + ConstantInt::get(Type::getInt32Ty(C), 1)})); + + for (const DataLayout *DL : {&DefaultDL, &AlignMin32DL, &AlignMin64DL}) { + EXPECT_EQ(DL->getTypeSizeInBits(ArrTy), + DL->getTypeAllocSizeInBits(ElemTy) * 8); + APInt GEPOffset(DL->getIndexTypeSizeInBits(Gep->getType()), 0); + EXPECT_TRUE(Gep->accumulateConstantOffset(*DL, GEPOffset)); + EXPECT_GE(DL->getTypeAllocSizeInBits(ElemTy), ElemBitWidth); + EXPECT_EQ(GEPOffset.getZExtValue(), DL->getTypeAllocSize(ElemTy)); + } + } + } +} + TEST(InstructionsTest, FPMathOperator) { LLVMContext Context; IRBuilder<> Builder(Context);