diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -982,10 +982,10 @@ LoadSDNode *LD, ISD::LoadExtType ExtType); /// Helper function to generate a set of stores to store a widen vector into - /// non-widen memory. + /// non-widen memory. Returns true if successful, false otherwise. /// StChain: list of chains for the stores we have generated /// ST: store of a widen value - void GenWidenVectorStores(SmallVectorImpl &StChain, StoreSDNode *ST); + bool GenWidenVectorStores(SmallVectorImpl &StChain, StoreSDNode *ST); /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4165,6 +4165,9 @@ else Result = GenWidenVectorLoads(LdChain, LD); + if (!Result) + llvm_unreachable("Unable to widen vector load"); + // If we generate a single load, we can use that for the chain. Otherwise, // build a factor node to remember the multiple loads are independent and // chain to that. @@ -5019,12 +5022,13 @@ return TLI.scalarizeVectorStore(ST, DAG); SmallVector StChain; - GenWidenVectorStores(StChain, ST); + if (!GenWidenVectorStores(StChain, ST)) + llvm_unreachable("Unable to widen vector store"); if (StChain.size() == 1) return StChain[0]; - else - return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); + + return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); } SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { @@ -5275,9 +5279,9 @@ // Align: If 0, don't allow use of a wider type // WidenEx: If Align is not 0, the amount additional we can load/store from. -static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, - unsigned Width, EVT WidenVT, - unsigned Align = 0, unsigned WidenEx = 0) { +static Optional findMemType(SelectionDAG &DAG, const TargetLowering &TLI, + unsigned Width, EVT WidenVT, + unsigned Align = 0, unsigned WidenEx = 0) { EVT WidenEltVT = WidenVT.getVectorElementType(); const bool Scalable = WidenVT.isScalableVector(); unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize(); @@ -5332,9 +5336,11 @@ } } + // Using element-wise loads and stores for widening operations is not + // supported for scalable vectors if (Scalable) - report_fatal_error("Using element-wise loads and stores for widening " - "operations is not supported for scalable vectors"); + return None; + return RetVT; } @@ -5398,29 +5404,59 @@ (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlignment(); // Find the vector type that can load from. - EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, - WidthDiff.getKnownMinSize()); - TypeSize NewVTWidth = NewVT.getSizeInBits(); - SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), + Optional FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, + WidthDiff.getKnownMinSize()); + + if (!FirstVT) + return SDValue(); + + SmallVector MemVTs; + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + // Unless we're able to load in one instruction we must work out how to load + // the remainder. + if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) { + Optional NewVT = FirstVT; + TypeSize RemainingWidth = LdWidth; + TypeSize NewVTWidth = FirstVTWidth; + do { + RemainingWidth -= NewVTWidth; + if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) { + // The current type we are using is too large. Find a better size. + NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinSize(), WidenVT, + LdAlign, WidthDiff.getKnownMinSize()); + if (!NewVT) + return SDValue(); + NewVTWidth = NewVT->getSizeInBits(); + } + MemVTs.push_back(*NewVT); + } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth)); + } + + SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(), LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction. - if (TypeSize::isKnownLE(LdWidth, NewVTWidth)) { - if (!NewVT.isVector()) { - unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize(); - EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + if (MemVTs.empty()) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + if (!FirstVT->isVector()) { + unsigned NumElts = + WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize(); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts); SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); } - if (NewVT == WidenVT) + if (FirstVT == WidenVT) return LdOp; // TODO: We don't currently have any tests that exercise this code path. - assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0); - unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize(); + assert(WidenWidth.getFixedSize() % FirstVTWidth.getFixedSize() == 0); + unsigned NumConcat = + WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize(); SmallVector ConcatOps(NumConcat); - SDValue UndefVal = DAG.getUNDEF(NewVT); + SDValue UndefVal = DAG.getUNDEF(*FirstVT); ConcatOps[0] = LdOp; for (unsigned i = 1; i != NumConcat; ++i) ConcatOps[i] = UndefVal; @@ -5433,28 +5469,22 @@ uint64_t ScaledOffset = 0; MachinePointerInfo MPI = LD->getPointerInfo(); - do { - LdWidth -= NewVTWidth; - IncrementPointer(cast(LdOp), NewVT, MPI, BasePtr, - &ScaledOffset); - - if (TypeSize::isKnownLT(LdWidth, NewVTWidth)) { - // The current type we are using is too large. Find a better size. - NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, - WidthDiff.getKnownMinSize()); - NewVTWidth = NewVT.getSizeInBits(); - } + // First incremement past the first load. + IncrementPointer(cast(LdOp), *FirstVT, MPI, BasePtr, + &ScaledOffset); + + for (EVT MemVT : MemVTs) { Align NewAlign = ScaledOffset == 0 ? LD->getOriginalAlign() : commonAlignment(LD->getAlign(), ScaledOffset); SDValue L = - DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo); - LdChain.push_back(L.getValue(1)); + DAG.getLoad(MemVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo); LdOps.push_back(L); - LdOp = L; - } while (TypeSize::isKnownGT(LdWidth, NewVTWidth)); + LdChain.push_back(L.getValue(1)); + IncrementPointer(cast(L), MemVT, MPI, BasePtr, &ScaledOffset); + } // Build the vector from the load operations. unsigned End = LdOps.size(); @@ -5576,7 +5606,7 @@ return DAG.getBuildVector(WidenVT, dl, Ops); } -void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, +bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, StoreSDNode *ST) { // The strategy assumes that we can efficiently store power-of-two widths. // The routine chops the vector into the largest vector stores with the same @@ -5602,9 +5632,30 @@ MachinePointerInfo MPI = ST->getPointerInfo(); uint64_t ScaledOffset = 0; + + // A breakdown of how to widen this vector store. Each element of the vector + // is a memory VT combined with the number of times it is to be stored to, + // e,g., v5i32 -> {{v2i32,2},{i32,1}} + SmallVector, 4> MemVTs; + while (StWidth.isNonZero()) { // Find the largest vector type we can store with. - EVT NewVT = FindMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT); + Optional NewVT = + findMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT); + if (!NewVT) + return false; + MemVTs.push_back({*NewVT, 0}); + TypeSize NewVTWidth = NewVT->getSizeInBits(); + + do { + StWidth -= NewVTWidth; + MemVTs.back().second++; + } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); + } + + for (const auto &Pair : MemVTs) { + EVT NewVT = Pair.first; + unsigned Count = Pair.second; TypeSize NewVTWidth = NewVT.getSizeInBits(); if (NewVT.isVector()) { @@ -5619,12 +5670,10 @@ MMOFlags, AAInfo); StChain.push_back(PartStore); - StWidth -= NewVTWidth; Idx += NumVTElts; - IncrementPointer(cast(PartStore), NewVT, MPI, BasePtr, &ScaledOffset); - } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); + } while (--Count); } else { // Cast the vector to the scalar type we can store. unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize(); @@ -5640,13 +5689,14 @@ MMOFlags, AAInfo); StChain.push_back(PartStore); - StWidth -= NewVTWidth; IncrementPointer(cast(PartStore), NewVT, MPI, BasePtr); - } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); + } while (--Count); // Restore index back to be relative to the original widen element type. Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth; } } + + return true; } /// Modifies a vector input (widen or narrows) to a vector of NVT. The