Index: llvm/include/llvm/Support/TypeSize.h =================================================================== --- llvm/include/llvm/Support/TypeSize.h +++ llvm/include/llvm/Support/TypeSize.h @@ -145,6 +145,24 @@ return *this; } + friend TypeSize operator-(const TypeSize &LHS, const TypeSize &RHS) { + assert(LHS.IsScalable == RHS.IsScalable && + "Arithmetic using mixed scalable and fixed types"); + return {LHS.MinSize - RHS.MinSize, LHS.IsScalable}; + } + + friend TypeSize operator/(const TypeSize &LHS, const TypeSize &RHS) { + assert(LHS.IsScalable == RHS.IsScalable && + "Arithmetic using mixed scalable and fixed types"); + return {LHS.MinSize / RHS.MinSize, LHS.IsScalable}; + } + + friend TypeSize operator%(const TypeSize &LHS, const TypeSize &RHS) { + assert(LHS.IsScalable == RHS.IsScalable && + "Arithmetic using mixed scalable and fixed types"); + return {LHS.MinSize % RHS.MinSize, LHS.IsScalable}; + } + // Return the minimum size with the assumption that the size is exact. // Use in places where a scalable size doesn't make sense (e.g. non-vector // types, or vectors in backends which don't support scalable vectors). Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4895,11 +4895,14 @@ isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { - if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) + if (RetVT.getSizeInBits().getFixedSize() < MemVTWidth || MemVT == WidenVT) return MemVT; } } + if (Scalable) + report_fatal_error("Using element-wise loads and stores for widening " + "operations is not supported for scalable vectors"); return RetVT; } @@ -4942,10 +4945,10 @@ // element type or scalar loads and then recombines it to the widen vector // type. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); - unsigned WidenWidth = WidenVT.getSizeInBits(); EVT LdVT = LD->getMemoryVT(); SDLoc dl(LD); assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector()); assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); // Load information @@ -4954,15 +4957,17 @@ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); - int LdWidth = LdVT.getSizeInBits(); - int WidthDiff = WidenWidth - LdWidth; + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; // Allow wider loads if they are sufficiently aligned to avoid memory faults // and if the original load is simple. unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment(); // Find the vector type that can load from. - EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); - int NewVTWidth = NewVT.getSizeInBits(); + EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, + WidthDiff.getKnownMinSize()); + TypeSize NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(LdOp.getValue(1)); @@ -4970,7 +4975,7 @@ // Check if we can load the element with one instruction. if (LdWidth <= NewVTWidth) { if (!NewVT.isVector()) { - unsigned NumElts = WidenWidth / NewVTWidth; + unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize(); EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); @@ -4978,8 +4983,9 @@ if (NewVT == WidenVT) return LdOp; - assert(WidenWidth % NewVTWidth == 0); - unsigned NumConcat = WidenWidth / NewVTWidth; + // TODO: We don't currently have any tests that exercise this code path. + assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0); + unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize(); SmallVector ConcatOps(NumConcat); SDValue UndefVal = DAG.getUNDEF(NewVT); ConcatOps[0] = LdOp; @@ -4992,35 +4998,30 @@ SmallVector LdOps; LdOps.push_back(LdOp); - LdWidth -= NewVTWidth; - unsigned Offset = 0; - - while (LdWidth > 0) { - unsigned Increment = NewVTWidth / 8; - Offset += Increment; - BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::Fixed(Increment)); + uint64_t ScaledOffset = 0; + MachinePointerInfo MPI = LD->getPointerInfo(); + do { + LdWidth -= NewVTWidth; + IncrementPointer(cast(LdOp), NewVT, MPI, BasePtr, + &ScaledOffset); - SDValue L; if (LdWidth < NewVTWidth) { // The current type we are using is too large. Find a better size. - NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); + NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, + WidthDiff.getKnownMinSize()); NewVTWidth = NewVT.getSizeInBits(); - L = DAG.getLoad(NewVT, dl, Chain, BasePtr, - LD->getPointerInfo().getWithOffset(Offset), - LD->getOriginalAlign(), MMOFlags, AAInfo); - LdChain.push_back(L.getValue(1)); - } else { - L = DAG.getLoad(NewVT, dl, Chain, BasePtr, - LD->getPointerInfo().getWithOffset(Offset), - LD->getOriginalAlign(), MMOFlags, AAInfo); - LdChain.push_back(L.getValue(1)); } + Align NewAlign = ScaledOffset == 0 + ? LD->getOriginalAlign() + : commonAlignment(LD->getAlign(), ScaledOffset); + SDValue L = + DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo); + LdChain.push_back(L.getValue(1)); + LdOps.push_back(L); LdOp = L; - - LdWidth -= NewVTWidth; - } + } while (LdWidth > NewVTWidth); // Build the vector from the load operations. unsigned End = LdOps.size(); @@ -5044,13 +5045,17 @@ } ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End); } + ConcatOps[--Idx] = LdOps[i]; for (--i; i >= 0; --i) { EVT NewLdTy = LdOps[i].getValueType(); if (NewLdTy != LdTy) { // Create a larger vector. - unsigned NumOps = NewLdTy.getSizeInBits() / LdTy.getSizeInBits(); - assert(NewLdTy.getSizeInBits() % LdTy.getSizeInBits() == 0); + unsigned NumOps = + (NewLdTy.getSizeInBits() / LdTy.getSizeInBits()).getKnownMinSize(); + assert( + (NewLdTy.getSizeInBits() % LdTy.getSizeInBits()).getKnownMinSize() == + 0); SmallVector WidenOps(NumOps); unsigned j = 0; for (; j != End-Idx; ++j) @@ -5071,7 +5076,7 @@ makeArrayRef(&ConcatOps[Idx], End - Idx)); // We need to fill the rest with undefs to build the vector. - unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); + unsigned NumOps = (WidenWidth / LdTy.getSizeInBits()).getKnownMinSize(); SmallVector WidenOps(NumOps); SDValue UndefVal = DAG.getUNDEF(LdTy); { @@ -5094,6 +5099,7 @@ EVT LdVT = LD->getMemoryVT(); SDLoc dl(LD); assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector()); // Load information SDValue Chain = LD->getChain(); @@ -5101,6 +5107,10 @@ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); + if (LdVT.isScalableVector()) + report_fatal_error("Generating widen scalable extending vector loads is " + "not yet supported"); + EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); unsigned NumElts = LdVT.getVectorNumElements(); Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6724,6 +6724,9 @@ EVT DstVT = LD->getValueType(0); ISD::LoadExtType ExtType = LD->getExtensionType(); + if (SrcVT.isScalableVector()) + report_fatal_error("Cannot scalarize scalable vector loads"); + unsigned NumElem = SrcVT.getVectorNumElements(); EVT SrcEltVT = SrcVT.getScalarType(); @@ -6811,6 +6814,9 @@ SDValue Value = ST->getValue(); EVT StVT = ST->getMemoryVT(); + if (StVT.isScalableVector()) + report_fatal_error("Cannot scalarize scalable vector stores"); + // The type of the data we want to save EVT RegVT = Value.getValueType(); EVT RegSclVT = RegVT.getScalarType(); Index: llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll +++ llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll @@ -104,3 +104,40 @@ %load = load , * %base ret %load } + +define void @load_nxv6f16(* %a) { +; CHECK-LABEL: load_nxv6f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1h { z0.s }, p1/z, [x0] +; CHECK-NEXT: ret + %val = load volatile , * %a + ret void +} + +define void @load_nxv6f32(* %a) { +; CHECK-LABEL: load_nxv6f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0] +; CHECK-NEXT: ret + %val = load volatile , * %a + ret void +} + +define void @load_nxv12f16(* %a) { +; CHECK-LABEL: load_nxv12f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.h +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0] +; CHECK-NEXT: ret + %val = load volatile , * %a + ret void +} + Index: llvm/test/CodeGen/AArch64/sve-split-load.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-load.ll +++ llvm/test/CodeGen/AArch64/sve-split-load.ll @@ -24,6 +24,18 @@ ret %load } +define @load_split_24i16(* %a) { +; CHECK-LABEL: load_split_24i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ret + %load = load , * %a + ret %load +} + define @load_split_32i16(* %a) { ; CHECK-LABEL: load_split_32i16: ; CHECK: // %bb.0: