diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h --- a/llvm/include/llvm/CodeGen/Analysis.h +++ b/llvm/include/llvm/CodeGen/Analysis.h @@ -67,15 +67,31 @@ /// void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, - SmallVectorImpl *Offsets = nullptr, - uint64_t StartingOffset = 0); + SmallVectorImpl *Offsets = nullptr, + TypeSize StartingOffset = TypeSize::Fixed(0)); +void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *Offsets = nullptr) { + TypeSize StartingOffset = + Ty->isScalableType() ? TypeSize::Scalable(0) : TypeSize::Fixed(0); + return ComputeValueVTs(TLI, DL, Ty, ValueVTs, Offsets, StartingOffset); +} /// Variant of ComputeValueVTs that also produces the memory VTs. void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, SmallVectorImpl *MemVTs, - SmallVectorImpl *Offsets = nullptr, - uint64_t StartingOffset = 0); + SmallVectorImpl *Offsets = nullptr, + TypeSize StartingOffset = TypeSize::Fixed(0)); +void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *MemVTs, + SmallVectorImpl *Offsets = nullptr) { + TypeSize StartingOffset = + Ty->isScalableType() ? TypeSize::Scalable(0) : TypeSize::Fixed(0); + return ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, Offsets, + StartingOffset); +} /// computeValueLLTs - Given an LLVM IR type, compute a sequence of /// LLTs that represent all the individual underlying diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -620,16 +620,16 @@ /// Used to lazily calculate structure layout information for a target machine, /// based on the DataLayout structure. class StructLayout { - uint64_t StructSize; + TypeSize StructSize; Align StructAlignment; unsigned IsPadded : 1; unsigned NumElements : 31; - uint64_t MemberOffsets[1]; // variable sized array! + TypeSize MemberOffsets[1]; // variable sized array! public: - uint64_t getSizeInBytes() const { return StructSize; } + TypeSize getSizeInBytes() const { return StructSize; } - uint64_t getSizeInBits() const { return 8 * StructSize; } + TypeSize getSizeInBits() const { return 8 * StructSize; } Align getAlignment() const { return StructAlignment; } @@ -641,12 +641,12 @@ /// index that contains it. unsigned getElementContainingOffset(uint64_t Offset) const; - uint64_t getElementOffset(unsigned Idx) const { + TypeSize getElementOffset(unsigned Idx) const { assert(Idx < NumElements && "Invalid element idx!"); return MemberOffsets[Idx]; } - uint64_t getElementOffsetInBits(unsigned Idx) const { + TypeSize getElementOffsetInBits(unsigned Idx) const { return getElementOffset(Idx) * 8; } @@ -672,8 +672,7 @@ } case Type::StructTyID: // Get the layout annotation... which is lazily created on demand. - return TypeSize::Fixed( - getStructLayout(cast(Ty))->getSizeInBits()); + return getStructLayout(cast(Ty))->getSizeInBits(); case Type::IntegerTyID: return TypeSize::Fixed(Ty->getIntegerBitWidth()); case Type::HalfTyID: diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -1148,6 +1148,9 @@ ResultElementType(getIndexedType(PointeeType, IdxList)) { assert(ResultElementType == cast(getType()->getScalarType())->getElementType()); + assert(!(SourceElementType->isStructTy() && + SourceElementType->isScalableType()) && + "No support GEP for scalable struct types."); init(Ptr, IdxList, NameStr); } @@ -1162,6 +1165,9 @@ ResultElementType(getIndexedType(PointeeType, IdxList)) { assert(ResultElementType == cast(getType()->getScalarType())->getElementType()); + assert(!(SourceElementType->isStructTy() && + SourceElementType->isScalableType()) && + "No support GEP for scalable struct types."); init(Ptr, IdxList, NameStr); } diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -236,6 +236,9 @@ return getTypeID() == ScalableVectorTyID || getTypeID() == FixedVectorTyID; } + /// True if this is an instance of scalable types. + bool isScalableType() const; + /// Return true if this type could be converted with a lossless BitCast to /// type 'Ty'. For example, i8* to i32*. BitCasts are valid for types of the /// same size only where no re-interpretation of the bits is done. diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -876,7 +876,7 @@ Type *SrcElemTy = GEP->getSourceElementType(); Type *ResElemTy = GEP->getResultElementType(); Type *ResTy = GEP->getType(); - if (!SrcElemTy->isSized() || isa(SrcElemTy)) + if (!SrcElemTy->isSized() || SrcElemTy->isScalableType()) return nullptr; if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -213,9 +213,9 @@ const Instruction *CtxI, const DominatorTree *DT, const TargetLibraryInfo *TLI) { - // For unsized types or scalable vectors we don't know exactly how many bytes + // For unsized types or scalable types we don't know exactly how many bytes // are dereferenced, so bail out. - if (!Ty->isSized() || isa(Ty)) + if (!Ty->isSized() || Ty->isScalableType()) return false; // When dereferenceability information is provided by a dereferenceable diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -351,7 +351,7 @@ // The size of the malloc's result type must be known to determine array size. Type *T = getMallocAllocatedType(CI, TLI); - if (!T || !T->isSized()) + if (!T || !T->isSized() || T->isScalableType()) return nullptr; unsigned ElementSize = DL.getTypeAllocSize(T); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3443,6 +3443,8 @@ for (const SCEV *IndexExpr : IndexExprs) { // Compute the (potentially symbolic) offset in bytes for this index. if (StructType *STy = dyn_cast(CurTy)) { + assert(!STy->isScalableType() && + "Scalable struct cannot be used in GEP."); // For a struct, add the member offset. ConstantInt *Index = cast(IndexExpr)->getValue(); unsigned FieldNo = Index->getZExtValue(); diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -82,29 +82,22 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, SmallVectorImpl *MemVTs, - SmallVectorImpl *Offsets, - uint64_t StartingOffset) { + SmallVectorImpl *Offsets, + TypeSize StartingOffset) { // Given a struct type, recursively traverse the elements. if (StructType *STy = dyn_cast(Ty)) { - // If the Offsets aren't needed, don't query the struct layout. This allows - // us to support structs with scalable vectors for operations that don't - // need offsets. - const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr; - for (StructType::element_iterator EB = STy->element_begin(), - EI = EB, + const StructLayout *SL = DL.getStructLayout(STy); + for (StructType::element_iterator EB = STy->element_begin(), EI = EB, EE = STy->element_end(); - EI != EE; ++EI) { - // Don't compute the element offset if we didn't get a StructLayout above. - uint64_t EltOffset = SL ? SL->getElementOffset(EI - EB) : 0; + EI != EE; ++EI) ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets, - StartingOffset + EltOffset); - } + StartingOffset + SL->getElementOffset(EI - EB)); return; } // Given an array type, recursively traverse the elements. if (ArrayType *ATy = dyn_cast(Ty)) { Type *EltTy = ATy->getElementType(); - uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue(); + TypeSize EltSize = DL.getTypeAllocSize(EltTy); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets, StartingOffset + i * EltSize); @@ -123,8 +116,8 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, - SmallVectorImpl *Offsets, - uint64_t StartingOffset) { + SmallVectorImpl *Offsets, + TypeSize StartingOffset) { return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /*MemVTs=*/nullptr, Offsets, StartingOffset); } @@ -135,6 +128,7 @@ uint64_t StartingOffset) { // Given a struct type, recursively traverse the elements. if (StructType *STy = dyn_cast(&Ty)) { + assert(!STy->isScalableType() && "Unexpected scalable struct type."); // If the Offsets aren't needed, don't query the struct layout. This allows // us to support structs with scalable vectors for operations that don't // need offsets. diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7431,7 +7431,7 @@ // whereas scalable vectors would have to be shifted by // <2log(vscale) + number of bits> in order to store the // low/high parts. Bailing out for now. - if (isa(StoreType)) + if (StoreType->isScalableType()) return false; if (!DL.typeSizeEqualsStoreSize(StoreType) || diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -194,8 +194,7 @@ LLVMContext &Ctx = OrigArg.Ty->getContext(); SmallVector SplitVTs; - SmallVector Offsets; - ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); + ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs); if (SplitVTs.size() == 0) return; @@ -724,8 +723,8 @@ const DataLayout &DL = MF.getDataLayout(); SmallVector SplitVTs; - SmallVector Offsets; - ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); + SmallVector Offsets; + ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets); assert(VRegs.size() == SplitVTs.size()); @@ -738,7 +737,9 @@ for (unsigned I = 0; I < NumValues; ++I) { Register Addr; - MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); + // FIXME: Considering scalable struct types in GlobalISel. + MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, + Offsets[I].getKnownMinSize()); auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, MRI.getType(VRegs[I]).getSizeInBytes(), commonAlignment(BaseAlign, Offsets[I])); @@ -754,8 +755,8 @@ const DataLayout &DL = MF.getDataLayout(); SmallVector SplitVTs; - SmallVector Offsets; - ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); + SmallVector Offsets; + ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets); assert(VRegs.size() == SplitVTs.size()); @@ -769,7 +770,9 @@ for (unsigned I = 0; I < NumValues; ++I) { Register Addr; - MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); + // FIXME: Considering scalable struct types in GlobalISel. + MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, + Offsets[I].getKnownMinSize()); auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, MRI.getType(VRegs[I]).getSizeInBytes(), commonAlignment(BaseAlign, Offsets[I])); diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -172,11 +172,14 @@ false, AI); } - // Scalable vectors may need a special StackID to distinguish + // Scalable types may need a special StackID to distinguish // them from other (fixed size) stack objects. - if (isa(Ty)) + if (Ty->isScalableType()) { + assert(Ty->isSized() && "It is only permitted to alloca struct " + "with the same scalable vector types."); MF->getFrameInfo().setStackID(FrameIndex, TFI->getStackIDForScalableVectors()); + } StaticAllocaMap[AI] = FrameIndex; // Update the catch handler information. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6120,6 +6120,9 @@ EVT VT = Base.getValueType(); SDValue Index; + if (Offset.getKnownMinSize() == 0) + return Base; + if (Offset.isScalable()) Index = getVScale(DL, Base.getValueType(), APInt(Base.getValueSizeInBits().getFixedSize(), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1887,7 +1887,7 @@ SDValue RetOp = getValue(I.getOperand(0)); SmallVector ValueVTs, MemVTs; - SmallVector Offsets; + SmallVector Offsets; ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs, &Offsets); unsigned NumValues = ValueVTs.size(); @@ -1897,8 +1897,7 @@ for (unsigned i = 0; i != NumValues; ++i) { // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. - SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, - TypeSize::Fixed(Offsets[i])); + SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); SDValue Val = RetOp.getValue(RetOp.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) @@ -1907,7 +1906,7 @@ Chain, getCurSDLoc(), Val, // FIXME: better loc info would be nice. Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), - commonAlignment(BaseAlign, Offsets[i])); + commonAlignment(BaseAlign, Offsets[i].getKnownMinSize())); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), @@ -4039,7 +4038,7 @@ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SmallVector ValueVTs, MemVTs; - SmallVector Offsets; + SmallVector Offsets; ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) @@ -4079,7 +4078,6 @@ SmallVector Values(NumValues); SmallVector Chains(std::min(MaxParallelChains, NumValues)); - EVT PtrVT = Ptr.getValueType(); MachineMemOperand::Flags MMOFlags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); @@ -4099,14 +4097,12 @@ Root = Chain; ChainI = 0; } - SDValue A = DAG.getNode(ISD::ADD, dl, - PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT), - Flags); + SDValue A = DAG.getMemBasePlusOffset(Ptr, Offsets[i], dl, Flags); - SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, - MachinePointerInfo(SV, Offsets[i]), Alignment, - MMOFlags, AAInfo, Ranges); + SDValue L = + DAG.getLoad(MemVTs[i], dl, Root, A, + MachinePointerInfo(SV, Offsets[i].getKnownMinSize()), + Alignment, MMOFlags, AAInfo, Ranges); Chains[ChainI] = L.getValue(1); if (MemVTs[i] != ValueVTs[i]) @@ -4133,11 +4129,11 @@ "call visitStoreToSwiftError when backend supports swifterror"); SmallVector ValueVTs; - SmallVector Offsets; + SmallVector Offsets; const Value *SrcV = I.getOperand(0); ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), SrcV->getType(), ValueVTs, &Offsets); - assert(ValueVTs.size() == 1 && Offsets[0] == 0 && + assert(ValueVTs.size() == 1 && Offsets[0].getKnownMinSize() == 0 && "expect a single EVT for swifterror"); SDValue Src = getValue(SrcV); @@ -4172,10 +4168,10 @@ "load_from_swift_error should not be constant memory"); SmallVector ValueVTs; - SmallVector Offsets; + SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, ValueVTs, &Offsets); - assert(ValueVTs.size() == 1 && Offsets[0] == 0 && + assert(ValueVTs.size() == 1 && Offsets[0].getKnownMinSize() == 0 && "expect a single EVT for swifterror"); // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT @@ -4209,7 +4205,7 @@ } SmallVector ValueVTs, MemVTs; - SmallVector Offsets; + SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), SrcV->getType(), ValueVTs, &MemVTs, &Offsets); unsigned NumValues = ValueVTs.size(); @@ -4245,13 +4241,13 @@ Root = Chain; ChainI = 0; } - SDValue Add = - DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(Offsets[i]), dl, Flags); + SDValue Add = DAG.getMemBasePlusOffset(Ptr, Offsets[i], dl, Flags); SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]); SDValue St = - DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]), + DAG.getStore(Root, dl, Val, Add, + MachinePointerInfo(PtrV, Offsets[i].getKnownMinSize()), Alignment, MMOFlags, AAInfo); Chains[ChainI] = St; } @@ -9283,26 +9279,26 @@ CLI.Ins.clear(); Type *OrigRetTy = CLI.RetTy; SmallVector RetTys; - SmallVector Offsets; + SmallVector Offsets; auto &DL = CLI.DAG.getDataLayout(); ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); if (CLI.IsPostTypeLegalization) { // If we are lowering a libcall after legalization, split the return type. SmallVector OldRetTys; - SmallVector OldOffsets; + SmallVector OldOffsets; RetTys.swap(OldRetTys); Offsets.swap(OldOffsets); for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) { EVT RetVT = OldRetTys[i]; - uint64_t Offset = OldOffsets[i]; + TypeSize Offset = OldOffsets[i]; MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8; RetTys.append(NumRegs, RegisterVT); for (unsigned j = 0; j != NumRegs; ++j) - Offsets.push_back(Offset + j * RegisterVTByteSZ); + Offsets.push_back(Offset + TypeSize::Fixed(j * RegisterVTByteSZ)); } } @@ -9604,7 +9600,6 @@ ComputeValueVTs(*this, DL, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); - EVT PtrVT = PVTs[0]; unsigned NumValues = RetTys.size(); ReturnValues.resize(NumValues); @@ -9618,14 +9613,15 @@ MachineFunction &MF = CLI.DAG.getMachineFunction(); Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx); for (unsigned i = 0; i < NumValues; ++i) { - SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, - CLI.DAG.getConstant(Offsets[i], CLI.DL, - PtrVT), Flags); - SDValue L = CLI.DAG.getLoad( - RetTys[i], CLI.DL, CLI.Chain, Add, - MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), - DemoteStackIdx, Offsets[i]), - HiddenSRetAlign); + SDValue Add = CLI.DAG.getMemBasePlusOffset(DemoteStackSlot, Offsets[i], + CLI.DL, Flags); + + SDValue L = + CLI.DAG.getLoad(RetTys[i], CLI.DL, CLI.Chain, Add, + MachinePointerInfo::getFixedStack( + CLI.DAG.getMachineFunction(), DemoteStackIdx, + Offsets[i].getKnownMinSize()), + HiddenSRetAlign); ReturnValues[i] = L; Chains[i] = L.getValue(1); } diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -44,9 +44,13 @@ // Support for StructLayout //===----------------------------------------------------------------------===// -StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { +StructLayout::StructLayout(StructType *ST, const DataLayout &DL) + : StructSize(0, false) { assert(!ST->isOpaque() && "Cannot get layout of opaque structs"); - StructSize = 0; + // We permit scalable vector struct as "sized" only when all the elements + // are the same scalable vector types. + if (ST->isScalableType()) + StructSize = TypeSize::Scalable(0); IsPadded = false; NumElements = ST->getNumElements(); @@ -56,45 +60,56 @@ const Align TyAlign = ST->isPacked() ? Align(1) : DL.getABITypeAlign(Ty); // Add padding if necessary to align the data element properly. - if (!isAligned(TyAlign, StructSize)) { + // Scalable vector struct must be the same scalable vector types. They have + // no alignment issues. + if (!StructSize.isScalable() && + !isAligned(TyAlign, StructSize.getFixedSize())) { IsPadded = true; - StructSize = alignTo(StructSize, TyAlign); + StructSize = TypeSize::Fixed(alignTo(StructSize.getFixedSize(), TyAlign)); } // Keep track of maximum alignment constraint. StructAlignment = std::max(TyAlign, StructAlignment); - MemberOffsets[i] = StructSize; + MemberOffsets.push_back(StructSize); // Consume space for this data item - StructSize += DL.getTypeAllocSize(Ty).getFixedValue(); + StructSize += DL.getTypeAllocSize(Ty); } // Add padding to the end of the struct so that it could be put in an array // and all array elements would be aligned correctly. - if (!isAligned(StructAlignment, StructSize)) { + if (!StructSize.isScalable() && + !isAligned(StructAlignment, StructSize.getFixedSize())) { IsPadded = true; - StructSize = alignTo(StructSize, StructAlignment); + StructSize = + TypeSize::Fixed(alignTo(StructSize.getFixedSize(), StructAlignment)); } } /// getElementContainingOffset - Given a valid offset into the structure, /// return the structure index that contains it. -unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const { - const uint64_t *SI = - std::upper_bound(&MemberOffsets[0], &MemberOffsets[NumElements], Offset); - assert(SI != &MemberOffsets[0] && "Offset not in structure type!"); +unsigned StructLayout::getElementContainingOffset(uint64_t O) const { + TypeSize Offset(O, StructSize.isScalable()); + const TypeSize *SI = + std::upper_bound(MemberOffsets.begin(), MemberOffsets.end(), Offset, + [](TypeSize lhs, TypeSize rhs) -> bool { + return TypeSize::isKnownLT(lhs, rhs); + }); + assert(SI != MemberOffsets.begin() && "Offset not in structure type!"); --SI; - assert(*SI <= Offset && "upper_bound didn't work"); - assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) && - (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) && - "Upper bound didn't work!"); + assert(TypeSize::isKnownLE(*SI, Offset) && "upper_bound didn't work"); + assert( + (SI == MemberOffsets.begin() || TypeSize::isKnownLE(*(SI - 1), Offset)) && + (SI + 1 == MemberOffsets.end() || + TypeSize::isKnownLT(Offset, *(SI + 1))) && + "Upper bound didn't work!"); // Multiple fields can have the same offset if any of them are zero sized. // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop // at the i32 element, because it is the last element at that offset. This is // the right one to return, because anything after it will have a higher // offset, implying that this element is non-empty. - return SI-&MemberOffsets[0]; + return SI - MemberOffsets.begin(); } //===----------------------------------------------------------------------===// @@ -679,8 +694,8 @@ // Otherwise, create the struct layout. Because it is variable length, we // malloc it, then use placement new. int NumElts = Ty->getNumElements(); - StructLayout *L = (StructLayout *) - safe_malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t)); + StructLayout *L = (StructLayout *)safe_malloc( + sizeof(StructLayout) + (NumElts - 1) * sizeof(TypeSize)); // Set SL before calling StructLayout's ctor. The ctor could cause other // entries to be added to TheMap, invalidating our reference. diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -180,6 +180,17 @@ return cast(this)->isSized(Visited); } +bool Type::isScalableType() const { + if (getTypeID() == ScalableVectorTyID) + return true; + + if (const auto *STy = dyn_cast(this)) { + if (STy->containsScalableVectorType()) + return true; + } + return false; +} + //===----------------------------------------------------------------------===// // Primitive 'Type' data //===----------------------------------------------------------------------===// @@ -528,11 +539,22 @@ // Okay, our struct is sized if all of the elements are, but if one of the // elements is opaque, the struct isn't sized *yet*, but may become sized in // the future, so just bail out without caching. + Type *FirstTy = getNumElements() > 0 ? elements()[0] : nullptr; + bool IsFirstElementScalable = false; + if (FirstTy) + IsFirstElementScalable = isa(FirstTy); for (Type *Ty : elements()) { - // If the struct contains a scalable vector type, don't consider it sized. - // This prevents it from being used in loads/stores/allocas/GEPs. - if (isa(Ty)) - return false; + if (IsFirstElementScalable) { + // We do not permit mix scalar types with scalable types within struct. + if (!isa(Ty)) + return false; + // All the scalable types within struct should be the same. + if (FirstTy != Ty) + return false; + } else { + if (isa(Ty)) + return false; + } if (!Ty->isSized(Visited)) return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1042,8 +1042,9 @@ // the correct memory offsets. SmallVector ValueVTs; - SmallVector Offsets; - ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset); + SmallVector Offsets; + ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, + TypeSize::Fixed(ArgOffset)); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -150,7 +150,7 @@ SmallVectorImpl *Offsets = nullptr, uint64_t StartingOffset = 0) { SmallVector TempVTs; - SmallVector TempOffsets; + SmallVector TempOffsets; // Special case for i128 - decompose to (i64, i64) if (Ty->isIntegerTy(128)) { @@ -177,7 +177,8 @@ return; } - ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); + ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, + TypeSize::Fixed(StartingOffset)); for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { EVT VT = TempVTs[i]; uint64_t Off = TempOffsets[i]; diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp --- a/llvm/lib/Target/X86/X86CallLowering.cpp +++ b/llvm/lib/Target/X86/X86CallLowering.cpp @@ -60,8 +60,8 @@ LLVMContext &Context = OrigArg.Ty->getContext(); SmallVector SplitVTs; - SmallVector Offsets; - ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); + SmallVector Offsets; + ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, TypeSize::Fixed(0)); assert(OrigArg.Regs.size() == 1 && "Can't handle multple regs yet"); if (OrigArg.Ty->isVoidTy()) diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -790,6 +790,10 @@ if (!isa(type)) return true; + // Be conservative for scalable struct. + if (type->isScalableType()) + return false; + // Check for padding within and between elements of a struct. StructType *StructTy = cast(type); const StructLayout *Layout = DL.getStructLayout(StructTy); diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -209,7 +209,7 @@ Type *Ty = PtrElemTy; do { auto *STy = dyn_cast(Ty); - if (!STy) + if (!STy || STy->isScalableType()) // Non-aggregate type, we cast and make byte-wise progress now. break; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -641,6 +641,9 @@ UndefValue::get(T), NewLoad, 0, Name)); } + if (ST->isScalableType()) + return nullptr; + // We don't want to break loads with padding here as we'd loose // the knowledge that padding exists for the rest of the pipeline. const DataLayout &DL = IC.getDataLayout(); @@ -1166,6 +1169,9 @@ return true; } + if (ST->isScalableType()) + return false; + // We don't want to break loads with padding here as we'd loose // the knowledge that padding exists for the rest of the pipeline. const DataLayout &DL = IC.getDataLayout(); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1230,7 +1230,7 @@ InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t Offset, SmallVectorImpl &NewIndices) { Type *Ty = PtrTy->getElementType(); - if (!Ty->isSized()) + if (!Ty->isSized() || Ty->isScalableType()) return nullptr; // Start with the index over the outer type. Note that the type size diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1507,7 +1507,7 @@ } StructType *STy = dyn_cast(Ty); - if (!STy) + if (!STy || STy->isScalableType()) return nullptr; const StructLayout *SL = DL.getStructLayout(STy); @@ -3613,8 +3613,8 @@ if (Ty->isSingleValueType()) return Ty; - uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedSize(); - uint64_t TypeSize = DL.getTypeSizeInBits(Ty).getFixedSize(); + TypeSize AllocSize = DL.getTypeAllocSize(Ty); + TypeSize TypeSize = DL.getTypeSizeInBits(Ty); Type *InnerTy; if (ArrayType *ArrTy = dyn_cast(Ty)) { @@ -3627,8 +3627,8 @@ return Ty; } - if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedSize() || - TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedSize()) + if (TypeSize::isKnownGT(AllocSize, DL.getTypeAllocSize(InnerTy)) || + TypeSize::isKnownGT(TypeSize, DL.getTypeSizeInBits(InnerTy))) return Ty; return stripAggregateTypeWrapping(DL, InnerTy); @@ -3694,7 +3694,7 @@ } StructType *STy = dyn_cast(Ty); - if (!STy) + if (!STy || STy->isScalableType()) return nullptr; const StructLayout *SL = DL.getStructLayout(STy); diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -496,6 +496,9 @@ // the struct fields. if (Ops.empty()) break; + // Be conservative for scalable struct. + if (STy->isScalableType()) + break; if (const SCEVConstant *C = dyn_cast(Ops[0])) if (SE.getTypeSizeInBits(C->getType()) <= 64) { const StructLayout &SL = *DL.getStructLayout(STy); diff --git a/llvm/test/CodeGen/RISCV/rvv/load-store-scalable-struct.ll b/llvm/test/CodeGen/RISCV/rvv/load-store-scalable-struct.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/load-store-scalable-struct.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+experimental-v -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s + +target triple = "riscv64-unknown-unknown-elf" + +%struct.test = type { , } + +define @test(%struct.test* %addr, i64 %vl) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrrs a2, vlenb, zero +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrrs a2, vlenb, zero +; CHECK-NEXT: add a3, a0, a2 +; CHECK-NEXT: vl1re64.v v25, (a3) +; CHECK-NEXT: vl1re64.v v26, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: vs1r.v v25, (a0) +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v26, (a2) +; CHECK-NEXT: vl1re64.v v25, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1re64.v v26, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfadd.vv v8, v26, v25 +; CHECK-NEXT: csrrs a0, vlenb, zero +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %ret = alloca %struct.test, align 8 + %val = load %struct.test, %struct.test* %addr + store %struct.test %val, %struct.test* %ret, align 8 + %0 = load %struct.test, %struct.test* %ret, align 8 + %1 = extractvalue %struct.test %0, 0 + %2 = extractvalue %struct.test %0, 1 + %3 = call @llvm.riscv.vfadd.nxv1f64.nxv1f64( + %1, %2, i64 %vl) + ret %3 +} + +declare @llvm.riscv.vfadd.nxv1f64.nxv1f64( + , + , + i64); diff --git a/llvm/test/Other/load-scalable-vector-struct.ll b/llvm/test/Other/load-scalable-vector-struct.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Other/load-scalable-vector-struct.ll @@ -0,0 +1,12 @@ +; RUN: opt -S -verify < %s 2>&1 | FileCheck %s + +%struct.test = type { , } + +define @load(%struct.test* %x) { +; CHECK: %a = load %struct.test, %struct.test* %x, align 4 +; CHECK: %b = extractvalue %struct.test %a, 1 +; CHECK: ret %b + %a = load %struct.test, %struct.test* %x + %b = extractvalue %struct.test %a, 1 + ret %b +} diff --git a/llvm/test/Other/store-scalable-vector-struct.ll b/llvm/test/Other/store-scalable-vector-struct.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Other/store-scalable-vector-struct.ll @@ -0,0 +1,14 @@ +; RUN: opt -S -verify < %s 2>&1 | FileCheck %s + +%struct.test = type { , } + +define void @store(%struct.test* %x, %y, %z) { +; CHECK: %a = insertvalue %struct.test undef, %y, 0 +; CHECK: %b = insertvalue %struct.test %a, %z, 1 +; CHECK: store %struct.test %b, %struct.test* %x +; CHECK: ret void + %a = insertvalue %struct.test undef, %y, 0 + %b = insertvalue %struct.test %a, %z, 1 + store %struct.test %b, %struct.test* %x + ret void +}