diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -744,8 +744,14 @@ :ref:`Scalable vectors ` cannot be global variables or members of arrays because their size is unknown at compile time. They are allowed in -structs to facilitate intrinsics returning multiple values. Structs containing -scalable vectors cannot be used in loads, stores, allocas, or GEPs. +structs to facilitate intrinsics returning multiple values. Generally, structs +containing scalable vectors are not considered "sized" and cannot be used in +loads, stores, allocas, or GEPs. The only exception to this rule is for structs +that contain scalable vectors of the same type (e.g. ``{, +}`` contains the same type while ``{, +}`` doesn't). These kinds of structs (we may call them +homogeneous scalable vector structs) are considered sized and can be used in +loads, stores, allocas, but not GEPs. Syntax:: @@ -10287,6 +10293,11 @@ '``type``' may be any sized type. +Structs containing scalable vectors cannot be used in allocas unless all +fields are the same scalable vector type (e.g. ``{, +}`` contains the same type while ``{, +}`` doesn't). + Semantics: """""""""" diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h --- a/llvm/include/llvm/CodeGen/Analysis.h +++ b/llvm/include/llvm/CodeGen/Analysis.h @@ -64,15 +64,33 @@ /// void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, - SmallVectorImpl *Offsets = nullptr, + SmallVectorImpl *Offsets, + TypeSize StartingOffset); +void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *Offsets = nullptr, uint64_t StartingOffset = 0); +void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *FixedOffsets, + uint64_t StartingOffset); /// Variant of ComputeValueVTs that also produces the memory VTs. void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, SmallVectorImpl *MemVTs, - SmallVectorImpl *Offsets = nullptr, + SmallVectorImpl *Offsets, + TypeSize StartingOffset); +void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *MemVTs, + SmallVectorImpl *Offsets = nullptr, uint64_t StartingOffset = 0); +void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *MemVTs, + SmallVectorImpl *FixedOffsets, + uint64_t StartingOffset); /// computeValueLLTs - Given an LLVM IR type, compute a sequence of /// LLTs that represent all the individual underlying diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -620,16 +620,16 @@ /// Used to lazily calculate structure layout information for a target machine, /// based on the DataLayout structure. -class StructLayout final : public TrailingObjects { - uint64_t StructSize; +class StructLayout final : public TrailingObjects { + TypeSize StructSize; Align StructAlignment; unsigned IsPadded : 1; unsigned NumElements : 31; public: - uint64_t getSizeInBytes() const { return StructSize; } + TypeSize getSizeInBytes() const { return StructSize; } - uint64_t getSizeInBits() const { return 8 * StructSize; } + TypeSize getSizeInBits() const { return 8 * StructSize; } Align getAlignment() const { return StructAlignment; } @@ -639,23 +639,22 @@ /// Given a valid byte offset into the structure, returns the structure /// index that contains it. - unsigned getElementContainingOffset(uint64_t Offset) const; + unsigned getElementContainingOffset(uint64_t FixedOffset) const; - MutableArrayRef getMemberOffsets() { - return llvm::MutableArrayRef(getTrailingObjects(), - NumElements); + MutableArrayRef getMemberOffsets() { + return llvm::MutableArrayRef(getTrailingObjects(), NumElements); } - ArrayRef getMemberOffsets() const { - return llvm::ArrayRef(getTrailingObjects(), NumElements); + ArrayRef getMemberOffsets() const { + return llvm::ArrayRef(getTrailingObjects(), NumElements); } - uint64_t getElementOffset(unsigned Idx) const { + TypeSize getElementOffset(unsigned Idx) const { assert(Idx < NumElements && "Invalid element idx!"); return getMemberOffsets()[Idx]; } - uint64_t getElementOffsetInBits(unsigned Idx) const { + TypeSize getElementOffsetInBits(unsigned Idx) const { return getElementOffset(Idx) * 8; } @@ -664,7 +663,7 @@ StructLayout(StructType *ST, const DataLayout &DL); - size_t numTrailingObjects(OverloadToken) const { + size_t numTrailingObjects(OverloadToken) const { return NumElements; } }; @@ -685,8 +684,7 @@ } case Type::StructTyID: // Get the layout annotation... which is lazily created on demand. - return TypeSize::Fixed( - getStructLayout(cast(Ty))->getSizeInBits()); + return getStructLayout(cast(Ty))->getSizeInBits(); case Type::IntegerTyID: return TypeSize::Fixed(Ty->getIntegerBitWidth()); case Type::HalfTyID: diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -218,7 +218,9 @@ SCDB_HasBody = 1, SCDB_Packed = 2, SCDB_IsLiteral = 4, - SCDB_IsSized = 8 + SCDB_IsSized = 8, + SCDB_ContainsScalableVector = 16, + SCDB_NotContainsScalableVector = 32 }; /// For a named struct that actually has a name, this is a pointer to the @@ -284,7 +286,16 @@ bool isSized(SmallPtrSetImpl *Visited = nullptr) const; /// Returns true if this struct contains a scalable vector. - bool containsScalableVectorType() const; + bool + containsScalableVectorType(SmallPtrSetImpl *Visited = nullptr) const; + + /// Returns true if this struct contains homogeneous scalable vector types. + /// Note that the definition of homogeneous scalable vector type is not + /// recursive here. That means the following structure will return false + /// when calling this function. + /// {{, }, + /// {, }} + bool containsHomogeneousScalableVectorTypes() const; /// Return true if this is a named struct that has a non-empty name. bool hasName() const { return SymbolTableEntry != nullptr; } diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -211,9 +211,7 @@ /// Return true if this is a scalable vector type or a target extension type /// with a scalable layout. - bool isScalableTy() const { - return getTypeID() == ScalableVectorTyID || isScalableTargetExtTy(); - } + bool isScalableTy() const; /// Return true if this is a FP type or a vector of FP. bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4345,8 +4345,10 @@ // We can bypass creating a target-independent constant expression and then // folding it back into a ConstantInt. This is just a compile-time // optimization. - return getConstant( - IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo)); + const StructLayout *SL = getDataLayout().getStructLayout(STy); + assert(!SL->getSizeInBits().isScalable() && + "Cannot get offset for structure containing scalable vector types"); + return getConstant(IntTy, SL->getElementOffset(FieldNo)); } const SCEV *ScalarEvolution::getUnknown(Value *V) { diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -7990,6 +7990,11 @@ if (!Indices.empty() && !Ty->isSized(&Visited)) return error(Loc, "base element of getelementptr must be sized"); + auto *STy = dyn_cast(Ty); + if (STy && STy->containsScalableVectorType()) + return error(Loc, "getelementptr cannot target structure that contains " + "scalable vector type"); + if (!GetElementPtrInst::getIndexedType(Ty, Indices)) return error(Loc, "invalid getelementptr indices"); Inst = GetElementPtrInst::Create(Ty, Ptr, Indices); diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -79,8 +79,8 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, SmallVectorImpl *MemVTs, - SmallVectorImpl *Offsets, - uint64_t StartingOffset) { + SmallVectorImpl *Offsets, + TypeSize StartingOffset) { // Given a struct type, recursively traverse the elements. if (StructType *STy = dyn_cast(Ty)) { // If the Offsets aren't needed, don't query the struct layout. This allows @@ -92,7 +92,8 @@ EE = STy->element_end(); EI != EE; ++EI) { // Don't compute the element offset if we didn't get a StructLayout above. - uint64_t EltOffset = SL ? SL->getElementOffset(EI - EB) : 0; + TypeSize EltOffset = SL ? SL->getElementOffset(EI - EB) + : TypeSize::get(0, StartingOffset.isScalable()); ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets, StartingOffset + EltOffset); } @@ -101,7 +102,7 @@ // Given an array type, recursively traverse the elements. if (ArrayType *ATy = dyn_cast(Ty)) { Type *EltTy = ATy->getElementType(); - uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue(); + TypeSize EltSize = DL.getTypeAllocSize(EltTy); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets, StartingOffset + i * EltSize); @@ -120,12 +121,62 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, - SmallVectorImpl *Offsets, - uint64_t StartingOffset) { + SmallVectorImpl *Offsets, + TypeSize StartingOffset) { return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /*MemVTs=*/nullptr, Offsets, StartingOffset); } +void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl &ValueVTs, + SmallVectorImpl *Offsets, + uint64_t StartingOffset) { + TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy()); + return ComputeValueVTs(TLI, DL, Ty, ValueVTs, Offsets, Offset); +} + +void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl &ValueVTs, + SmallVectorImpl *FixedOffsets, + uint64_t StartingOffset) { + TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy()); + SmallVector Offsets; + if (FixedOffsets) + ComputeValueVTs(TLI, DL, Ty, ValueVTs, &Offsets, Offset); + else + ComputeValueVTs(TLI, DL, Ty, ValueVTs, nullptr, Offset); + + if (FixedOffsets) + for (TypeSize Offset : Offsets) + FixedOffsets->push_back(Offset.getKnownMinValue()); +} + +void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl &ValueVTs, + SmallVectorImpl *MemVTs, + SmallVectorImpl *Offsets, + uint64_t StartingOffset) { + TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy()); + return ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, Offsets, Offset); +} + +void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl &ValueVTs, + SmallVectorImpl *MemVTs, + SmallVectorImpl *FixedOffsets, + uint64_t StartingOffset) { + TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy()); + SmallVector Offsets; + if (FixedOffsets) + ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, &Offsets, Offset); + else + ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, nullptr, Offset); + + if (FixedOffsets) + for (TypeSize Offset : Offsets) + FixedOffsets->push_back(Offset.getKnownMinValue()); +} + void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty, SmallVectorImpl &ValueTys, SmallVectorImpl *Offsets, diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -152,9 +152,10 @@ false, AI); } - // Scalable vectors may need a special StackID to distinguish - // them from other (fixed size) stack objects. - if (isa(Ty)) + // Scalable vectors and structures that contain scalable vectors may + // need a special StackID to distinguish them from other (fixed size) + // stack objects. + if (Ty->isScalableTy()) MF->getFrameInfo().setStackID(FrameIndex, TFI->getStackIDForScalableVectors()); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2025,7 +2025,7 @@ SmallVector ValueVTs, MemVTs; SmallVector Offsets; ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs, - &Offsets); + &Offsets, 0); unsigned NumValues = ValueVTs.size(); SmallVector Chains(NumValues); @@ -4161,7 +4161,7 @@ Type *Ty = I.getType(); SmallVector ValueVTs, MemVTs; SmallVector Offsets; - ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets); + ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets, 0); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -4260,7 +4260,7 @@ SmallVector Offsets; const Value *SrcV = I.getOperand(0); ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), - SrcV->getType(), ValueVTs, &Offsets); + SrcV->getType(), ValueVTs, &Offsets, 0); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); @@ -4296,7 +4296,7 @@ SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, - ValueVTs, &Offsets); + ValueVTs, &Offsets, 0); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); @@ -4333,7 +4333,7 @@ SmallVector ValueVTs, MemVTs; SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), - SrcV->getType(), ValueVTs, &MemVTs, &Offsets); + SrcV->getType(), ValueVTs, &MemVTs, &Offsets, 0); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -9903,7 +9903,7 @@ SmallVector RetTys; SmallVector Offsets; auto &DL = CLI.DAG.getDataLayout(); - ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); + ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets, 0); if (CLI.IsPostTypeLegalization) { // If we are lowering a libcall after legalization, split the return type. diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -45,21 +45,30 @@ // Support for StructLayout //===----------------------------------------------------------------------===// -StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { +StructLayout::StructLayout(StructType *ST, const DataLayout &DL) + : StructSize(TypeSize::Fixed(0)) { assert(!ST->isOpaque() && "Cannot get layout of opaque structs"); - StructSize = 0; IsPadded = false; NumElements = ST->getNumElements(); // Loop over each of the elements, placing them in memory. for (unsigned i = 0, e = NumElements; i != e; ++i) { Type *Ty = ST->getElementType(i); + if (i == 0 && Ty->isScalableTy()) + StructSize = TypeSize::Scalable(0); + const Align TyAlign = ST->isPacked() ? Align(1) : DL.getABITypeAlign(Ty); // Add padding if necessary to align the data element properly. - if (!isAligned(TyAlign, StructSize)) { + // Currently the only structure with scalable size will be the homogeneous + // scalable vector types. Homogeneous scalable vector types have members of + // the same data type so no alignment issue will happen. The condition here + // assumes so and needs to be adjusted if this assumption changes (e.g. we + // support structures with arbitrary scalable data type, or structure that + // contains both fixed size and scalable size data type members). + if (!StructSize.isScalable() && !isAligned(TyAlign, StructSize)) { IsPadded = true; - StructSize = alignTo(StructSize, TyAlign); + StructSize = TypeSize::Fixed(alignTo(StructSize, TyAlign)); } // Keep track of maximum alignment constraint. @@ -67,28 +76,39 @@ getMemberOffsets()[i] = StructSize; // Consume space for this data item - StructSize += DL.getTypeAllocSize(Ty).getFixedValue(); + StructSize += DL.getTypeAllocSize(Ty); } // Add padding to the end of the struct so that it could be put in an array // and all array elements would be aligned correctly. - if (!isAligned(StructAlignment, StructSize)) { + if (!StructSize.isScalable() && !isAligned(StructAlignment, StructSize)) { IsPadded = true; - StructSize = alignTo(StructSize, StructAlignment); + StructSize = TypeSize::Fixed(alignTo(StructSize, StructAlignment)); } } /// getElementContainingOffset - Given a valid offset into the structure, /// return the structure index that contains it. -unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const { - ArrayRef MemberOffsets = getMemberOffsets(); - auto SI = llvm::upper_bound(MemberOffsets, Offset); +unsigned StructLayout::getElementContainingOffset(uint64_t FixedOffset) const { + assert(!StructSize.isScalable() && + "Cannot get element at offset for structure containing scalable " + "vector types"); + TypeSize Offset = TypeSize::Fixed(FixedOffset); + ArrayRef MemberOffsets = getMemberOffsets(); + + const auto *SI = + std::upper_bound(MemberOffsets.begin(), MemberOffsets.end(), Offset, + [](TypeSize LHS, TypeSize RHS) -> bool { + return TypeSize::isKnownLT(LHS, RHS); + }); assert(SI != MemberOffsets.begin() && "Offset not in structure type!"); --SI; - assert(*SI <= Offset && "upper_bound didn't work"); - assert((SI == MemberOffsets.begin() || *(SI - 1) <= Offset) && - (SI + 1 == MemberOffsets.end() || *(SI + 1) > Offset) && - "Upper bound didn't work!"); + assert(TypeSize::isKnownLE(*SI, Offset) && "upper_bound didn't work"); + assert( + (SI == MemberOffsets.begin() || TypeSize::isKnownLE(*(SI - 1), Offset)) && + (SI + 1 == MemberOffsets.end() || + TypeSize::isKnownGT(*(SI + 1), Offset)) && + "Upper bound didn't work!"); // Multiple fields can have the same offset if any of them are zero sized. // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop @@ -706,7 +726,7 @@ // Otherwise, create the struct layout. Because it is variable length, we // malloc it, then use placement new. StructLayout *L = (StructLayout *)safe_malloc( - StructLayout::totalSizeToAlloc(Ty->getNumElements())); + StructLayout::totalSizeToAlloc(Ty->getNumElements())); // Set SL before calling StructLayout's ctor. The ctor could cause other // entries to be added to TheMap, invalidating our reference. diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -63,6 +63,14 @@ return false; } +bool Type::isScalableTy() const { + if (const auto *STy = dyn_cast(this)) { + SmallPtrSet Visited; + return STy->containsScalableVectorType(&Visited); + } + return getTypeID() == ScalableVectorTyID || isScalableTargetExtTy(); +} + const fltSemantics &Type::getFltSemantics() const { switch (getTypeID()) { case HalfTyID: return APFloat::IEEEhalf(); @@ -450,18 +458,51 @@ return ST; } -bool StructType::containsScalableVectorType() const { +bool StructType::containsScalableVectorType( + SmallPtrSetImpl *Visited) const { + if ((getSubclassData() & SCDB_ContainsScalableVector) != 0) + return true; + + if ((getSubclassData() & SCDB_NotContainsScalableVector) != 0) + return false; + + if (Visited && !Visited->insert(const_cast(this)).second) + return false; + for (Type *Ty : elements()) { - if (isa(Ty)) + if (isa(Ty)) { + const_cast(this)->setSubclassData( + getSubclassData() | SCDB_ContainsScalableVector); return true; - if (auto *STy = dyn_cast(Ty)) - if (STy->containsScalableVectorType()) + } + if (auto *STy = dyn_cast(Ty)) { + if (STy->containsScalableVectorType(Visited)) { + const_cast(this)->setSubclassData( + getSubclassData() | SCDB_ContainsScalableVector); return true; + } + } } + // For structures that are opaque, return false but do not set the + // SCDB_NotContainsScalableVector flag since it may gain scalable vector type + // when it becomes non-opaque. + if (!isOpaque()) + const_cast(this)->setSubclassData( + getSubclassData() | SCDB_NotContainsScalableVector); return false; } +bool StructType::containsHomogeneousScalableVectorTypes() const { + Type *FirstTy = getNumElements() > 0 ? elements()[0] : nullptr; + if (!FirstTy || !isa(FirstTy)) + return false; + for (Type *Ty : elements()) + if (Ty != FirstTy) + return false; + return true; +} + void StructType::setBody(ArrayRef Elements, bool isPacked) { assert(isOpaque() && "Struct body already set!"); @@ -581,10 +622,19 @@ // Okay, our struct is sized if all of the elements are, but if one of the // elements is opaque, the struct isn't sized *yet*, but may become sized in // the future, so just bail out without caching. + // The ONLY special case inside a struct that is considered sized is when the + // elements are homogeneous of a scalable vector type. + if (containsHomogeneousScalableVectorTypes()) { + const_cast(this)->setSubclassData(getSubclassData() | + SCDB_IsSized); + return true; + } for (Type *Ty : elements()) { // If the struct contains a scalable vector type, don't consider it sized. - // This prevents it from being used in loads/stores/allocas/GEPs. - if (isa(Ty)) + // This prevents it from being used in loads/stores/allocas/GEPs. The ONLY + // special case right now is a structure of homogenous scalable vector + // types and is handled by the if-statement before this for-loop. + if (Ty->isScalableTy()) return false; if (!Ty->isSized(Visited)) return false; diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -828,9 +828,11 @@ Check(!isa(GV.getValueType()), "Globals cannot contain scalable vectors", &GV); - if (auto *STy = dyn_cast(GV.getValueType())) - Check(!STy->containsScalableVectorType(), + if (auto *STy = dyn_cast(GV.getValueType())) { + SmallPtrSet Visited; + Check(!STy->containsScalableVectorType(&Visited), "Globals cannot contain scalable vectors", &GV); + } // Check if it's a target extension type that disallows being used as a // global. @@ -3836,6 +3838,14 @@ "GEP base pointer is not a vector or a vector of pointers", &GEP); Check(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP); + if (auto *STy = dyn_cast(GEP.getSourceElementType())) { + SmallPtrSet Visited; + Check(!STy->containsScalableVectorType(&Visited), + "getelementptr cannot target structure that contains scalable vector" + "type", + &GEP); + } + SmallVector Idxs(GEP.indices()); Check( all_of(Idxs, [](Value *V) { return V->getType()->isIntOrIntVectorTy(); }), diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -768,6 +768,11 @@ // the knowledge that padding exists for the rest of the pipeline. const DataLayout &DL = IC.getDataLayout(); auto *SL = DL.getStructLayout(ST); + + // Don't unpack for structure with scalable vector. + if (SL->getSizeInBits().isScalable()) + return nullptr; + if (SL->hasPadding()) return nullptr; @@ -1291,6 +1296,11 @@ // the knowledge that padding exists for the rest of the pipeline. const DataLayout &DL = IC.getDataLayout(); auto *SL = DL.getStructLayout(ST); + + // Don't unpack for structure with scalable vector. + if (SL->getSizeInBits().isScalable()) + return false; + if (SL->hasPadding()) return false; diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2787,6 +2787,11 @@ return R; if (LoadInst *L = dyn_cast(Agg)) { + // Bail out if the aggregate contains scalable vector type + if (auto *STy = dyn_cast(Agg->getType()); + STy && STy->containsScalableVectorType()) + return nullptr; + // If the (non-volatile) load only has one use, we can rewrite this to a // load from a GEP. This reduces the size of the load. If a load is used // only by extractvalue instructions then this either must have been diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -3977,6 +3977,10 @@ return nullptr; const StructLayout *SL = DL.getStructLayout(STy); + + if (SL->getSizeInBits().isScalable()) + return nullptr; + if (Offset >= SL->getSizeInBytes()) return nullptr; uint64_t EndOffset = Offset + Size; diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -522,6 +522,9 @@ // the struct fields. if (Ops.empty()) break; + assert( + !STy->containsScalableVectorType() && + "GEPs are not supported on structures containing scalable vectors"); if (const SCEVConstant *C = dyn_cast(Ops[0])) if (SE.getTypeSizeInBits(C->getType()) <= 64) { const StructLayout &SL = *DL.getStructLayout(STy); diff --git a/llvm/test/Assembler/scalable-vector-struct.ll b/llvm/test/Assembler/scalable-vector-struct.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Assembler/scalable-vector-struct.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=verify -S < %s 2>&1 | FileCheck %s + +%struct.test = type { , } + +define @load(%struct.test* %x) { +; CHECK-LABEL: define @load +; CHECK-SAME: (ptr [[X:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load [[STRUCT_TEST:%.*]], ptr [[X]], align 4 +; CHECK-NEXT: [[B:%.*]] = extractvalue [[STRUCT_TEST]] [[A]], 1 +; CHECK-NEXT: ret [[B]] +; + %a = load %struct.test, %struct.test* %x + %b = extractvalue %struct.test %a, 1 + ret %b +} + +define void @store(%struct.test* %x, %y, %z) { +; CHECK-LABEL: define void @store +; CHECK-SAME: (ptr [[X:%.*]], [[Y:%.*]], [[Z:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = insertvalue [[STRUCT_TEST:%.*]] undef, [[Y]], 0 +; CHECK-NEXT: [[B:%.*]] = insertvalue [[STRUCT_TEST]] [[A]], [[Z]], 1 +; CHECK-NEXT: store [[STRUCT_TEST]] [[B]], ptr [[X]], align 4 +; CHECK-NEXT: ret void +; + %a = insertvalue %struct.test undef, %y, 0 + %b = insertvalue %struct.test %a, %z, 1 + store %struct.test %b, %struct.test* %x + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll --- a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll +++ b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll @@ -42,19 +42,21 @@ ; CHECKO3-NEXT: ret ; CHECK-O0-LABEL: test_alloca_store_reload: ; CHECK-O0: // %bb.0: -; CHECK-O0-NEXT: sub sp, sp, #16 -; CHECK-O0-NEXT: add x8, sp, #14 -; CHECK-O0-NEXT: str p0, [x8] -; CHECK-O0-NEXT: ldr p0, [x8] -; CHECK-O0-NEXT: add sp, sp, #16 +; CHECK-O0-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-O0-NEXT: addvl sp, sp, #-1 +; CHECK-O0-NEXT: str p0, [sp, #7, mul vl] +; CHECK-O0-NEXT: ldr p0, [sp, #7, mul vl] +; CHECK-O0-NEXT: addvl sp, sp, #1 +; CHECK-O0-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-O0-NEXT: ret ; ; CHECK-O3-LABEL: test_alloca_store_reload: ; CHECK-O3: // %bb.0: -; CHECK-O3-NEXT: sub sp, sp, #16 -; CHECK-O3-NEXT: add x8, sp, #14 -; CHECK-O3-NEXT: str p0, [x8] -; CHECK-O3-NEXT: add sp, sp, #16 +; CHECK-O3-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-O3-NEXT: addvl sp, sp, #-1 +; CHECK-O3-NEXT: str p0, [sp, #7, mul vl] +; CHECK-O3-NEXT: addvl sp, sp, #1 +; CHECK-O3-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-O3-NEXT: ret %ptr = alloca target("aarch64.svcount"), align 1 store target("aarch64.svcount") %val, ptr %ptr diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s + +target triple = "riscv64-unknown-unknown-elf" + +%struct.test = type { , } + +define @test(%struct.test* %addr, i64 %vl) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrrs a2, vlenb, zero +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, a0, 8 +; CHECK-NEXT: vl1re64.v v8, (a2) +; CHECK-NEXT: vl1re64.v v9, (a0) +; CHECK-NEXT: addi a0, sp, 24 +; CHECK-NEXT: vs1r.v v8, (a0) +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v9, (a2) +; CHECK-NEXT: vl1re64.v v8, (a0) +; CHECK-NEXT: vl1re64.v v9, (a2) +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vfadd.vv v8, v9, v8 +; CHECK-NEXT: csrrs a0, vlenb, zero +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %ret = alloca %struct.test, align 8 + %val = load %struct.test, %struct.test* %addr + store %struct.test %val, %struct.test* %ret, align 8 + %0 = load %struct.test, %struct.test* %ret, align 8 + %1 = extractvalue %struct.test %0, 0 + %2 = extractvalue %struct.test %0, 1 + %3 = call @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64( + poison, + %1, + %2, i64 %vl) + ret %3 +} + +declare @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64( + , + , + , + i64) diff --git a/llvm/test/Transforms/InstCombine/scalable-vector-struct.ll b/llvm/test/Transforms/InstCombine/scalable-vector-struct.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/scalable-vector-struct.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=instcombine -S < %s 2>&1 | FileCheck %s + +%struct.test = type { , } + +define @load(%struct.test* %x) { +; CHECK-LABEL: define @load +; CHECK-SAME: (ptr [[X:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load [[STRUCT_TEST:%.*]], ptr [[X]], align 4 +; CHECK-NEXT: [[B:%.*]] = extractvalue [[STRUCT_TEST]] [[A]], 1 +; CHECK-NEXT: ret [[B]] +; + %a = load %struct.test, %struct.test* %x + %b = extractvalue %struct.test %a, 1 + ret %b +} + +define void @store(%struct.test* %x, %y, %z) { +; CHECK-LABEL: define void @store +; CHECK-SAME: (ptr [[X:%.*]], [[Y:%.*]], [[Z:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = insertvalue [[STRUCT_TEST:%.*]] undef, [[Y]], 0 +; CHECK-NEXT: [[B:%.*]] = insertvalue [[STRUCT_TEST]] [[A]], [[Z]], 1 +; CHECK-NEXT: store [[STRUCT_TEST]] [[B]], ptr [[X]], align 4 +; CHECK-NEXT: ret void +; + %a = insertvalue %struct.test undef, %y, 0 + %b = insertvalue %struct.test %a, %z, 1 + store %struct.test %b, %struct.test* %x + ret void +} diff --git a/llvm/test/Transforms/SROA/scalable-vector-struct.ll b/llvm/test/Transforms/SROA/scalable-vector-struct.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SROA/scalable-vector-struct.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK + +; This test checks that SROA runs mem2reg on structure that contains +; homogeneous scalable vectors. + +%struct.test = type { , } + +define %struct.test @alloca( %x, %y) { +; CHECK-LABEL: @alloca( +; CHECK-NEXT: [[AGG0:%.*]] = insertvalue [[STRUCT_TEST:%.*]] undef, [[X:%.*]], 0 +; CHECK-NEXT: [[AGG1:%.*]] = insertvalue [[STRUCT_TEST]] [[AGG0]], [[Y:%.*]], 1 +; CHECK-NEXT: ret [[STRUCT_TEST]] [[AGG1]] +; + %addr = alloca %struct.test, align 4 + %agg0 = insertvalue %struct.test undef, %x, 0 + %agg1 = insertvalue %struct.test %agg0, %y, 1 + store %struct.test %agg1, %struct.test* %addr, align 4 + %val = load %struct.test, %struct.test* %addr, align 4 + ret %struct.test %val +} diff --git a/llvm/test/Verifier/scalable-vector-struct-gep.ll b/llvm/test/Verifier/scalable-vector-struct-gep.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Verifier/scalable-vector-struct-gep.ll @@ -0,0 +1,9 @@ +; RUN: not opt -S -passes=verify < %s 2>&1 | FileCheck %s + +%struct.test = type { , } + +define void @gep(ptr %a) { +; CHECK: error: getelementptr cannot target structure that contains scalable vector type + %a.addr = getelementptr %struct.test, ptr %a, i32 0 + ret void +}