diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h --- a/llvm/include/llvm/CodeGen/Analysis.h +++ b/llvm/include/llvm/CodeGen/Analysis.h @@ -64,14 +64,23 @@ /// void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, - SmallVectorImpl *Offsets = nullptr, + SmallVectorImpl *Offsets, + TypeSize StartingOffset); +void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *Offsets = nullptr, uint64_t StartingOffset = 0); /// Variant of ComputeValueVTs that also produces the memory VTs. void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, SmallVectorImpl *MemVTs, - SmallVectorImpl *Offsets = nullptr, + SmallVectorImpl *Offsets, + TypeSize StartingOffset); +void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *MemVTs, + SmallVectorImpl *Offsets = nullptr, uint64_t StartingOffset = 0); /// computeValueLLTs - Given an LLVM IR type, compute a sequence of diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -615,16 +615,28 @@ /// Used to lazily calculate structure layout information for a target machine, /// based on the DataLayout structure. -class StructLayout final : public TrailingObjects { - uint64_t StructSize; +class StructLayout final : public TrailingObjects { + TypeSize StructSize; Align StructAlignment; unsigned IsPadded : 1; unsigned NumElements : 31; public: - uint64_t getSizeInBytes() const { return StructSize; } + uint64_t getSizeInBytes() const { return StructSize.getFixedValue(); } - uint64_t getSizeInBits() const { return 8 * StructSize; } + uint64_t getSizeInBits() const { return 8 * StructSize.getFixedValue(); } + + uint64_t getMinSizeInBytes() const { + assert(StructSize.isScalable() && + "Caller of this function should be a scalable size"); + return StructSize.getKnownMinValue(); + } + + uint64_t getMinSizeInBits() const { + assert(StructSize.isScalable() && + "Caller of this function should be a scalable size"); + return StructSize.getKnownMinValue(); + } Align getAlignment() const { return StructAlignment; } @@ -634,23 +646,22 @@ /// Given a valid byte offset into the structure, returns the structure /// index that contains it. - unsigned getElementContainingOffset(uint64_t Offset) const; + unsigned getElementContainingOffset(uint64_t FixedOffset) const; - MutableArrayRef getMemberOffsets() { - return llvm::MutableArrayRef(getTrailingObjects(), - NumElements); + MutableArrayRef getMemberOffsets() { + return llvm::MutableArrayRef(getTrailingObjects(), NumElements); } - ArrayRef getMemberOffsets() const { - return llvm::ArrayRef(getTrailingObjects(), NumElements); + ArrayRef getMemberOffsets() const { + return llvm::ArrayRef(getTrailingObjects(), NumElements); } - uint64_t getElementOffset(unsigned Idx) const { + TypeSize getElementOffset(unsigned Idx) const { assert(Idx < NumElements && "Invalid element idx!"); return getMemberOffsets()[Idx]; } - uint64_t getElementOffsetInBits(unsigned Idx) const { + TypeSize getElementOffsetInBits(unsigned Idx) const { return getElementOffset(Idx) * 8; } @@ -659,7 +670,7 @@ StructLayout(StructType *ST, const DataLayout &DL); - size_t numTrailingObjects(OverloadToken) const { + size_t numTrailingObjects(OverloadToken) const { return NumElements; } }; @@ -667,7 +678,10 @@ // The implementation of this method is provided inline as it is particularly // well suited to constant folding when called on a specific Type subclass. inline TypeSize DataLayout::getTypeSizeInBits(Type *Ty) const { - assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); + assert((Ty->isSized() || + (isa(Ty) && + cast(Ty)->isContainHomogeneousScalableVectorType())) && + "Cannot getTypeInfo() on a type that is unsized!"); switch (Ty->getTypeID()) { case Type::LabelTyID: return TypeSize::Fixed(getPointerSizeInBits(0)); @@ -680,8 +694,7 @@ } case Type::StructTyID: // Get the layout annotation... which is lazily created on demand. - return TypeSize::Fixed( - getStructLayout(cast(Ty))->getSizeInBits()); + return getStructLayout(cast(Ty))->StructSize; case Type::IntegerTyID: return TypeSize::Fixed(Ty->getIntegerBitWidth()); case Type::HalfTyID: diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -286,6 +286,9 @@ /// Returns true if this struct contains a scalable vector. bool containsScalableVectorType() const; + /// Returns true if this struct contains homogeneous scalable vector types + bool isContainHomogeneousScalableVectorType() const; + /// Return true if this is a named struct that has a non-empty name. bool hasName() const { return SymbolTableEntry != nullptr; } diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -1167,6 +1167,9 @@ ResultElementType(getIndexedType(PointeeType, IdxList)) { assert(cast(getType()->getScalarType()) ->isOpaqueOrPointeeTypeMatches(ResultElementType)); + assert(!(SourceElementType->isStructTy() && + cast(SourceElementType)->containsScalableVectorType()) && + "GEP does not support struct types with scalable vector"); init(Ptr, IdxList, NameStr); } @@ -1181,6 +1184,9 @@ ResultElementType(getIndexedType(PointeeType, IdxList)) { assert(cast(getType()->getScalarType()) ->isOpaqueOrPointeeTypeMatches(ResultElementType)); + assert(!(SourceElementType->isStructTy() && + cast(SourceElementType)->containsScalableVectorType()) && + "GEP does not support struct types with scalable vector"); init(Ptr, IdxList, NameStr); } diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -202,8 +202,8 @@ const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI, AssumptionCache *AC, const DominatorTree *DT, const TargetLibraryInfo *TLI) { - // For unsized types or scalable vectors we don't know exactly how many bytes - // are dereferenced, so bail out. + // For unsized types, scalable vectors, or structure of scalable vector, we + // don't know exactly how many bytes are dereferenced, so bail out. if (!Ty->isSized() || Ty->isScalableTy()) return false; diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3740,6 +3740,8 @@ for (const SCEV *IndexExpr : IndexExprs) { // Compute the (potentially symbolic) offset in bytes for this index. if (StructType *STy = dyn_cast(CurTy)) { + assert(!STy->containsScalableVectorType() && + "Structure with scalable vector type should not use GEP"); // For a struct, add the member offset. ConstantInt *Index = cast(IndexExpr)->getValue(); unsigned FieldNo = Index->getZExtValue(); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -7620,7 +7620,11 @@ return error(SizeLoc, "element count must have integer type"); SmallPtrSet Visited; - if (!Alignment && !Ty->isSized(&Visited)) + // Allow structure that contains homogeneous scalable vector types to be a + // target of alloca + if (!Alignment && !Ty->isSized(&Visited) && + !(isa(Ty) && + cast(Ty)->isContainHomogeneousScalableVectorType())) return error(TyLoc, "Cannot allocate unsized type"); if (!Alignment) Alignment = M->getDataLayout().getPrefTypeAlign(Ty); @@ -7679,7 +7683,11 @@ Val->getType()->getNonOpaquePointerElementType())); } SmallPtrSet Visited; - if (!Alignment && !Ty->isSized(&Visited)) + // Allow structure that contains homogeneous scalable vector types to be a + // target of load + if (!Alignment && !Ty->isSized(&Visited) && + !(isa(Ty) && + cast(Ty)->isContainHomogeneousScalableVectorType())) return error(ExplicitTypeLoc, "loading unsized types is not allowed"); if (!Alignment) Alignment = M->getDataLayout().getABITypeAlign(Ty); @@ -7731,7 +7739,12 @@ Ordering == AtomicOrdering::AcquireRelease) return error(Loc, "atomic store cannot use Acquire ordering"); SmallPtrSet Visited; - if (!Alignment && !Val->getType()->isSized(&Visited)) + // Allow structure that contains homogeneous scalable vector types to be a + // target of store + if (!Alignment && !Val->getType()->isSized(&Visited) && + !(isa(Val->getType()) && + cast(Val->getType()) + ->isContainHomogeneousScalableVectorType())) return error(Loc, "storing unsized types is not allowed"); if (!Alignment) Alignment = M->getDataLayout().getABITypeAlign(Val->getType()); diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -5938,7 +5938,11 @@ unsigned AS = Record.size() == 5 ? Record[4] : DL.getAllocaAddrSpace(); SmallPtrSet Visited; - if (!Align && !Ty->isSized(&Visited)) + // Allow structure that contains homogeneous scalable vector types to be a + // target of alloca + if (!Align && !Ty->isSized(&Visited) && + !(isa(Ty) && + cast(Ty)->isContainHomogeneousScalableVectorType())) return error("alloca of unsized type"); if (!Align) Align = DL.getPrefTypeAlign(Ty); @@ -5980,7 +5984,11 @@ if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; SmallPtrSet Visited; - if (!Align && !Ty->isSized(&Visited)) + // Allow structure that contains homogeneous scalable vector types to be a + // target of load + if (!Align && !Ty->isSized(&Visited) && + !(isa(Ty) && + cast(Ty)->isContainHomogeneousScalableVectorType())) return error("load of unsized type"); if (!Align) Align = TheModule->getDataLayout().getABITypeAlign(Ty); @@ -6059,7 +6067,12 @@ if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; SmallPtrSet Visited; - if (!Align && !Val->getType()->isSized(&Visited)) + // Allow structure that contains homogeneous scalable vector types to be a + // target of store + if (!Align && !Val->getType()->isSized(&Visited) && + !(isa(Val->getType()) && + cast(Val->getType()) + ->isContainHomogeneousScalableVectorType())) return error("store of unsized type"); if (!Align) Align = TheModule->getDataLayout().getABITypeAlign(Val->getType()); diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -79,8 +79,8 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, SmallVectorImpl *MemVTs, - SmallVectorImpl *Offsets, - uint64_t StartingOffset) { + SmallVectorImpl *Offsets, + TypeSize StartingOffset) { // Given a struct type, recursively traverse the elements. if (StructType *STy = dyn_cast(Ty)) { // If the Offsets aren't needed, don't query the struct layout. This allows @@ -92,7 +92,9 @@ EE = STy->element_end(); EI != EE; ++EI) { // Don't compute the element offset if we didn't get a StructLayout above. - uint64_t EltOffset = SL ? SL->getElementOffset(EI - EB) : 0; + TypeSize EltOffset = SL ? SL->getElementOffset(EI - EB) + : StartingOffset.isScalable() ? TypeSize::Scalable(0) + : TypeSize::Fixed(0); ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets, StartingOffset + EltOffset); } @@ -101,7 +103,7 @@ // Given an array type, recursively traverse the elements. if (ArrayType *ATy = dyn_cast(Ty)) { Type *EltTy = ATy->getElementType(); - uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue(); + TypeSize EltSize = DL.getTypeAllocSize(EltTy); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets, StartingOffset + i * EltSize); @@ -120,18 +122,40 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, - SmallVectorImpl *Offsets, - uint64_t StartingOffset) { + SmallVectorImpl *Offsets, + TypeSize StartingOffset) { return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /*MemVTs=*/nullptr, Offsets, StartingOffset); } +void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl &ValueVTs, + SmallVectorImpl *Offsets, + uint64_t Offset) { + TypeSize StartingOffset = + Ty->isScalableTy() ? TypeSize::Scalable(Offset) : TypeSize::Fixed(Offset); + return ComputeValueVTs(TLI, DL, Ty, ValueVTs, Offsets, StartingOffset); +} + +void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl &ValueVTs, + SmallVectorImpl *MemVTs, + SmallVectorImpl *Offsets, + uint64_t Offset) { + TypeSize StartingOffset = + Ty->isScalableTy() ? TypeSize::Scalable(Offset) : TypeSize::Fixed(Offset); + return ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, Offsets, + StartingOffset); +} + void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty, SmallVectorImpl &ValueTys, SmallVectorImpl *Offsets, uint64_t StartingOffset) { // Given a struct type, recursively traverse the elements. if (StructType *STy = dyn_cast(&Ty)) { + assert(!STy->containsScalableVectorType() && + "Unexpect scalable struct type"); // If the Offsets aren't needed, don't query the struct layout. This allows // us to support structs with scalable vectors for operations that don't // need offsets. diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7681,6 +7681,10 @@ if (StoreType->isScalableTy()) return false; + if (isa(StoreType) && + cast(StoreType)->containsScalableVectorType()) + return false; + if (!DL.typeSizeEqualsStoreSize(StoreType) || DL.getTypeSizeInBits(StoreType) == 0) return false; diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -255,7 +255,7 @@ LLVMContext &Ctx = OrigArg.Ty->getContext(); SmallVector SplitVTs; - ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs, Offsets, 0); + ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs); if (SplitVTs.size() == 0) return; @@ -838,8 +838,8 @@ const DataLayout &DL = MF.getDataLayout(); SmallVector SplitVTs; - SmallVector Offsets; - ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); + SmallVector Offsets; + ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets); assert(VRegs.size() == SplitVTs.size()); @@ -868,8 +868,8 @@ const DataLayout &DL = MF.getDataLayout(); SmallVector SplitVTs; - SmallVector Offsets; - ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); + SmallVector Offsets; + ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets); assert(VRegs.size() == SplitVTs.size()); diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -152,12 +152,15 @@ false, AI); } - // Scalable vectors may need a special StackID to distinguish - // them from other (fixed size) stack objects. - if (isa(Ty)) + // Scalable vectors and structure that contains scalable vector may + // need a special StackID to distinguish them from other (fixed size) + // stack objects. + if (isa(Ty) || + (isa(Ty) && + cast(Ty)->containsScalableVectorType())) { MF->getFrameInfo().setStackID(FrameIndex, TFI->getStackIDForScalableVectors()); - + } StaticAllocaMap[AI] = FrameIndex; // Update the catch handler information. if (Iter != CatchObjects.end()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6963,6 +6963,9 @@ SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags) { + if (Offset.getKnownMinValue() == 0) + return Base; + EVT VT = Base.getValueType(); SDValue Index; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2006,7 +2006,7 @@ SDValue RetOp = getValue(I.getOperand(0)); SmallVector ValueVTs, MemVTs; - SmallVector Offsets; + SmallVector Offsets; ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs, &Offsets); unsigned NumValues = ValueVTs.size(); @@ -4138,7 +4138,7 @@ Type *Ty = I.getType(); SmallVector ValueVTs, MemVTs; - SmallVector Offsets; + SmallVector Offsets; ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) @@ -4184,7 +4184,6 @@ SmallVector Values(NumValues); SmallVector Chains(std::min(MaxParallelChains, NumValues)); - EVT PtrVT = Ptr.getValueType(); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { @@ -4201,10 +4200,7 @@ Root = Chain; ChainI = 0; } - SDValue A = DAG.getNode(ISD::ADD, dl, - PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT), - Flags); + SDValue A = DAG.getMemBasePlusOffset(Ptr, Offsets[i], dl, Flags); SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, @@ -4235,7 +4231,7 @@ "call visitStoreToSwiftError when backend supports swifterror"); SmallVector ValueVTs; - SmallVector Offsets; + SmallVector Offsets; const Value *SrcV = I.getOperand(0); ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), SrcV->getType(), ValueVTs, &Offsets); @@ -4272,7 +4268,7 @@ "load_from_swift_error should not be constant memory"); SmallVector ValueVTs; - SmallVector Offsets; + SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, ValueVTs, &Offsets); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && @@ -4309,7 +4305,7 @@ } SmallVector ValueVTs, MemVTs; - SmallVector Offsets; + SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), SrcV->getType(), ValueVTs, &MemVTs, &Offsets); unsigned NumValues = ValueVTs.size(); @@ -4344,8 +4340,7 @@ Root = Chain; ChainI = 0; } - SDValue Add = - DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(Offsets[i]), dl, Flags); + SDValue Add = DAG.getMemBasePlusOffset(Ptr, Offsets[i], dl, Flags); SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]); @@ -9891,26 +9886,28 @@ CLI.Ins.clear(); Type *OrigRetTy = CLI.RetTy; SmallVector RetTys; - SmallVector Offsets; + SmallVector Offsets; auto &DL = CLI.DAG.getDataLayout(); ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); if (CLI.IsPostTypeLegalization) { // If we are lowering a libcall after legalization, split the return type. SmallVector OldRetTys; - SmallVector OldOffsets; + SmallVector OldOffsets; RetTys.swap(OldRetTys); Offsets.swap(OldOffsets); for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) { EVT RetVT = OldRetTys[i]; - uint64_t Offset = OldOffsets[i]; + TypeSize Offset = OldOffsets[i]; + assert(!Offset.isScalable() && + "Offset should be fixed here to be adjusted"); MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8; RetTys.append(NumRegs, RegisterVT); for (unsigned j = 0; j != NumRegs; ++j) - Offsets.push_back(Offset + j * RegisterVTByteSZ); + Offsets.push_back(Offset + TypeSize::Fixed(j * RegisterVTByteSZ)); } } diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -45,9 +45,12 @@ // Support for StructLayout //===----------------------------------------------------------------------===// -StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { +StructLayout::StructLayout(StructType *ST, const DataLayout &DL) + : StructSize(0, false) { assert(!ST->isOpaque() && "Cannot get layout of opaque structs"); - StructSize = 0; + if (ST->containsScalableVectorType()) + StructSize = ST->containsScalableVectorType() ? TypeSize::Scalable(0) + : TypeSize::Fixed(0); IsPadded = false; NumElements = ST->getNumElements(); @@ -57,9 +60,15 @@ const Align TyAlign = ST->isPacked() ? Align(1) : DL.getABITypeAlign(Ty); // Add padding if necessary to align the data element properly. - if (!isAligned(TyAlign, StructSize)) { + // Currently the only structure with scalable size will be the be the tuple + // types. Tuple types have members of the same data type so no alignment + // issue will happen. The condition here assumes so and needs to be adjusted + // if this assumption changes (e.g. we support structures with arbitrary + // scalable data type, or structure that contains both fixed size and + // scalable size data type members). + if (!StructSize.isScalable() && !isAligned(TyAlign, StructSize)) { IsPadded = true; - StructSize = alignTo(StructSize, TyAlign); + StructSize = TypeSize::Fixed(alignTo(StructSize, TyAlign)); } // Keep track of maximum alignment constraint. @@ -67,28 +76,38 @@ getMemberOffsets()[i] = StructSize; // Consume space for this data item - StructSize += DL.getTypeAllocSize(Ty).getFixedValue(); + StructSize += DL.getTypeAllocSize(Ty); } // Add padding to the end of the struct so that it could be put in an array // and all array elements would be aligned correctly. - if (!isAligned(StructAlignment, StructSize)) { + if (!StructSize.isScalable() && !isAligned(StructAlignment, StructSize)) { IsPadded = true; - StructSize = alignTo(StructSize, StructAlignment); + StructSize = TypeSize::Fixed(alignTo(StructSize, StructAlignment)); } } /// getElementContainingOffset - Given a valid offset into the structure, /// return the structure index that contains it. -unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const { - ArrayRef MemberOffsets = getMemberOffsets(); - auto SI = llvm::upper_bound(MemberOffsets, Offset); +unsigned StructLayout::getElementContainingOffset(uint64_t FixedOffset) const { + assert(!StructSize.isScalable() && + "Caller of this should not be a structure with scalable type"); + TypeSize Offset = TypeSize::Fixed(FixedOffset); + ArrayRef MemberOffsets = getMemberOffsets(); + + const auto *SI = + std::upper_bound(MemberOffsets.begin(), MemberOffsets.end(), Offset, + [](TypeSize LHS, TypeSize RHS) -> bool { + return TypeSize::isKnownLT(LHS, RHS); + }); assert(SI != MemberOffsets.begin() && "Offset not in structure type!"); --SI; - assert(*SI <= Offset && "upper_bound didn't work"); - assert((SI == MemberOffsets.begin() || *(SI - 1) <= Offset) && - (SI + 1 == MemberOffsets.end() || *(SI + 1) > Offset) && - "Upper bound didn't work!"); + assert(TypeSize::isKnownLE(*SI, Offset) && "upper_bound didn't work"); + assert( + (SI == MemberOffsets.begin() || TypeSize::isKnownLE(*(SI - 1), Offset)) && + (SI + 1 == MemberOffsets.end() || + TypeSize::isKnownGT(*(SI + 1), Offset)) && + "Upper bound didn't work!"); // Multiple fields can have the same offset if any of them are zero sized. // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop @@ -706,7 +725,7 @@ // Otherwise, create the struct layout. Because it is variable length, we // malloc it, then use placement new. StructLayout *L = (StructLayout *)safe_malloc( - StructLayout::totalSizeToAlloc(Ty->getNumElements())); + StructLayout::totalSizeToAlloc(Ty->getNumElements())); // Set SL before calling StructLayout's ctor. The ctor could cause other // entries to be added to TheMap, invalidating our reference. @@ -765,7 +784,10 @@ == false) for the requested type \a Ty. */ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { - assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); + assert((Ty->isSized() || + (isa(Ty) && + cast(Ty)->isContainHomogeneousScalableVectorType())) && + "Cannot getTypeInfo() on a type that is unsized!"); switch (Ty->getTypeID()) { // Early escape for the non-numeric types. case Type::LabelTyID: diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -462,6 +462,16 @@ return false; } +bool StructType::isContainHomogeneousScalableVectorType() const { + Type *FirstTy = getNumElements() > 0 ? elements()[0] : nullptr; + if (!isa(FirstTy)) + return false; + for (Type *Ty : elements()) + if (Ty != FirstTy) + return false; + return true; +} + void StructType::setBody(ArrayRef Elements, bool isPacked) { assert(isOpaque() && "Struct body already set!"); @@ -583,7 +593,9 @@ // the future, so just bail out without caching. for (Type *Ty : elements()) { // If the struct contains a scalable vector type, don't consider it sized. - // This prevents it from being used in loads/stores/allocas/GEPs. + // This prevents it from being used in loads/stores/allocas/GEPs. The ONLY + // special case right now is a structure of homogenous scalable vector + // types. if (isa(Ty)) return false; if (!Ty->isSized(Visited)) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -3943,7 +3943,12 @@ Check(A->value() <= Value::MaximumAlignment, "huge alignment values are unsupported", &LI); } - Check(ElTy->isSized(), "loading unsized types is not allowed", &LI); + // Only allow an unsized type when it is a structure that contains homogeneous + // vector types + Check(ElTy->isSized() || + (isa(ElTy) && + cast(ElTy)->isContainHomogeneousScalableVectorType()), + "loading unsized types is not allowed", &LI); if (LI.isAtomic()) { Check(LI.getOrdering() != AtomicOrdering::Release && LI.getOrdering() != AtomicOrdering::AcquireRelease, @@ -3971,7 +3976,12 @@ Check(A->value() <= Value::MaximumAlignment, "huge alignment values are unsupported", &SI); } - Check(ElTy->isSized(), "storing unsized types is not allowed", &SI); + // Only allow an unsized type when it is a structure that contains homogeneous + // vector types + Check(ElTy->isSized() || + (isa(ElTy) && + cast(ElTy)->containsScalableVectorType()), + "storing unsized types is not allowed", &SI); if (SI.isAtomic()) { Check(SI.getOrdering() != AtomicOrdering::Acquire && SI.getOrdering() != AtomicOrdering::AcquireRelease, @@ -4023,7 +4033,13 @@ void Verifier::visitAllocaInst(AllocaInst &AI) { SmallPtrSet Visited; - Check(AI.getAllocatedType()->isSized(&Visited), + Type *AllocatedType = AI.getAllocatedType(); + // Only allow an unsized type when it is a structure that contains homogeneous + // vector types + Check(AllocatedType->isSized(&Visited) || + (isa(AllocatedType) && + cast(AllocatedType) + ->isContainHomogeneousScalableVectorType()), "Cannot allocate unsized type", &AI); Check(AI.getArraySize()->getType()->isIntegerTy(), "Alloca array size must have integer type", &AI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1070,7 +1070,7 @@ // the correct memory offsets. SmallVector ValueVTs; - SmallVector Offsets; + SmallVector Offsets; ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset); for (unsigned Value = 0, NumValues = ValueVTs.size(); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -181,10 +181,14 @@ return; } - ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); + SmallVector TempOffsetsFixed; + for (uint64_t Offset : TempOffsets) + TempOffsetsFixed.push_back(TypeSize::Fixed(Offset)); + + ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsetsFixed, StartingOffset); for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { EVT VT = TempVTs[i]; - uint64_t Off = TempOffsets[i]; + uint64_t Off = TempOffsetsFixed[i]; // Split vectors into individual elements, except for v2f16, which // we will pass as a single scalar. if (VT.isVector()) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -735,6 +735,10 @@ PoisonValue::get(T), NewLoad, 0, Name)); } + // Don't unpack for structure with scalable vector. + if (ST->containsScalableVectorType()) + return nullptr; + // We don't want to break loads with padding here as we'd loose // the knowledge that padding exists for the rest of the pipeline. const DataLayout &DL = IC.getDataLayout(); @@ -1259,6 +1263,10 @@ return true; } + // Don't unpack for structure with scalable vector. + if (ST->containsScalableVectorType()) + return false; + // We don't want to break loads with padding here as we'd loose // the knowledge that padding exists for the rest of the pipeline. const DataLayout &DL = IC.getDataLayout(); diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -3936,6 +3936,9 @@ if (!STy) return nullptr; + if (STy->containsScalableVectorType()) + return nullptr; + const StructLayout *SL = DL.getStructLayout(STy); if (Offset >= SL->getSizeInBytes()) return nullptr; diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -522,6 +522,9 @@ // the struct fields. if (Ops.empty()) break; + // Don't do this for structure with scalable vector. + if (STy->containsScalableVectorType()) + break; if (const SCEVConstant *C = dyn_cast(Ops[0])) if (SE.getTypeSizeInBits(C->getType()) <= 64) { const StructLayout &SL = *DL.getStructLayout(STy); diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s + +target triple = "riscv64-unknown-unknown-elf" + +%struct.test = type { , } + +define @test(%struct.test* %addr, i64 %vl) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrrs a2, vlenb, zero +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, a0, 8 +; CHECK-NEXT: vl1re64.v v8, (a2) +; CHECK-NEXT: vl1re64.v v9, (a0) +; CHECK-NEXT: addi a0, sp, 24 +; CHECK-NEXT: vs1r.v v8, (a0) +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v9, (a2) +; CHECK-NEXT: vl1re64.v v8, (a0) +; CHECK-NEXT: vl1re64.v v9, (a2) +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vfadd.vv v8, v9, v8 +; CHECK-NEXT: csrrs a0, vlenb, zero +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %ret = alloca %struct.test, align 8 + %val = load %struct.test, %struct.test* %addr + store %struct.test %val, %struct.test* %ret, align 8 + %0 = load %struct.test, %struct.test* %ret, align 8 + %1 = extractvalue %struct.test %0, 0 + %2 = extractvalue %struct.test %0, 1 + %3 = call @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64( + poison, + %1, + %2, i64 %vl) + ret %3 +} + +declare @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64( + , + , + , + i64) diff --git a/llvm/test/Other/load-scalable-vector-struct.ll b/llvm/test/Other/load-scalable-vector-struct.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Other/load-scalable-vector-struct.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=verify -S < %s 2>&1 | FileCheck %s + +%struct.test = type { , } + +define @load(%struct.test* %x) { +; CHECK-LABEL: define @load +; CHECK-SAME: (ptr [[X:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load [[STRUCT_TEST:%.*]], ptr [[X]], align 4 +; CHECK-NEXT: [[B:%.*]] = extractvalue [[STRUCT_TEST]] [[A]], 1 +; CHECK-NEXT: ret [[B]] +; + %a = load %struct.test, %struct.test* %x + %b = extractvalue %struct.test %a, 1 + ret %b +} diff --git a/llvm/test/Other/store-scalable-vector-struct.ll b/llvm/test/Other/store-scalable-vector-struct.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Other/store-scalable-vector-struct.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=verify -S < %s 2>&1 | FileCheck %s + +%struct.test = type { , } + +define void @store(%struct.test* %x, %y, %z) { +; CHECK-LABEL: define void @store +; CHECK-SAME: (ptr [[X:%.*]], [[Y:%.*]], [[Z:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = insertvalue [[STRUCT_TEST:%.*]] undef, [[Y]], 0 +; CHECK-NEXT: [[B:%.*]] = insertvalue [[STRUCT_TEST]] [[A]], [[Z]], 1 +; CHECK-NEXT: store [[STRUCT_TEST]] [[B]], ptr [[X]], align 4 +; CHECK-NEXT: ret void +; + %a = insertvalue %struct.test undef, %y, 0 + %b = insertvalue %struct.test %a, %z, 1 + store %struct.test %b, %struct.test* %x + ret void +}