diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1350,13 +1350,9 @@ SDValue getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM); SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, - SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, - Align Alignment, MachineMemOperand::Flags MMOFlags, - const AAMDNodes &AAInfo = AAMDNodes(), - bool IsCompressing = false); - SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, - SDValue Mask, SDValue EVL, MachineMemOperand *MMO, - bool IsCompressing = false); + SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, + MachineMemOperand *MMO, ISD::MemIndexedMode AM, + bool IsTruncating = false, bool IsCompressing = false); SDValue getTruncStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -846,6 +846,7 @@ void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi); void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -872,6 +873,7 @@ SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_ExtVecInRegOp(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo); @@ -910,6 +912,7 @@ SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); + SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); SDValue WidenVecRes_ScalarOp(SDNode* N); @@ -944,6 +947,7 @@ SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); + SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo); SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -937,6 +937,9 @@ case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; + case ISD::VP_LOAD: + SplitVecRes_VP_LOAD(cast(N), Lo, Hi); + break; case ISD::MLOAD: SplitVecRes_MLOAD(cast(N), Lo, Hi); break; @@ -1752,6 +1755,86 @@ ReplaceValueWith(SDValue(LD, 1), Ch); } +void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, + SDValue &Hi) { + assert(LD->isUnindexed() && "Indexed VP load during type legalization!"); + EVT LoVT, HiVT; + SDLoc dl(LD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + SDValue Offset = LD->getOffset(); + assert(Offset.isUndef() && "Unexpected indexed variable-length load offset"); + Align Alignment = LD->getOriginalAlign(); + SDValue Mask = LD->getMask(); + SDValue EVL = LD->getVectorLength(); + EVT MemoryVT = LD->getMemoryVT(); + + EVT LoMemVT, HiMemVT; + bool HiIsEmpty = false; + std::tie(LoMemVT, HiMemVT) = + DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty); + + // Split Mask operand + SDValue MaskLo, MaskHi; + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } + + // Split EVL operand + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, LD->getValueType(0), dl); + + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + LD->getPointerInfo(), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, Alignment, LD->getAAInfo(), LD->getRanges()); + + Lo = + DAG.getLoadVP(LD->getAddressingMode(), ExtType, LoVT, dl, Ch, Ptr, Offset, + MaskLo, EVLLo, LoMemVT, MMO, LD->isExpandingLoad()); + + if (HiIsEmpty) { + // The hi vp_load has zero storage size. We therefore simply set it to + // the low vp_load and rely on subsequent removal from the chain. + Hi = Lo; + } else { + // Generate hi vp_load. + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, + LD->isExpandingLoad()); + + MachinePointerInfo MPI; + if (LoMemVT.isScalableVector()) + MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace()); + else + MPI = LD->getPointerInfo().getWithOffset( + LoMemVT.getStoreSize().getFixedSize()); + + MMO = DAG.getMachineFunction().getMachineMemOperand( + MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, + LD->getAAInfo(), LD->getRanges()); + + Hi = DAG.getLoadVP(LD->getAddressingMode(), ExtType, HiVT, dl, Ch, Ptr, + Offset, MaskHi, EVLHi, HiMemVT, MMO, + LD->isExpandingLoad()); + } + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), Ch); +} + void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi) { assert(MLD->isUnindexed() && "Indexed masked load during type legalization!"); @@ -2192,6 +2275,9 @@ case ISD::STORE: Res = SplitVecOp_STORE(cast(N), OpNo); break; + case ISD::VP_STORE: + Res = SplitVecOp_VP_STORE(cast(N), OpNo); + break; case ISD::MSTORE: Res = SplitVecOp_MSTORE(cast(N), OpNo); break; @@ -2595,6 +2681,84 @@ return SDValue(); } +SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed vp_store of vector?"); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + SDValue Offset = N->getOffset(); + assert(Offset.isUndef() && "Unexpected VP store offset"); + SDValue Mask = N->getMask(); + SDValue EVL = N->getVectorLength(); + SDValue Data = N->getValue(); + Align Alignment = N->getOriginalAlign(); + SDLoc DL(N); + + SDValue DataLo, DataHi; + if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + // Split Data operand + GetSplitVector(Data, DataLo, DataHi); + else + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + // Split Mask operand + SDValue MaskLo, MaskHi; + if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + } + + EVT MemoryVT = N->getMemoryVT(); + EVT LoMemVT, HiMemVT; + bool HiIsEmpty = false; + std::tie(LoMemVT, HiMemVT) = + DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty); + + // Split EVL + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, Data.getValueType(), DL); + + SDValue Lo, Hi; + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + N->getPointerInfo(), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); + + Lo = DAG.getStoreVP(Ch, DL, DataLo, Ptr, Offset, MaskLo, EVLLo, LoMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), + N->isCompressingStore()); + + // If the hi vp_store has zero storage size, only the lo vp_store is needed. + if (HiIsEmpty) + return Lo; + + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, + N->isCompressingStore()); + + MachinePointerInfo MPI; + if (LoMemVT.isScalableVector()) { + Alignment = commonAlignment(Alignment, + LoMemVT.getSizeInBits().getKnownMinSize() / 8); + MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); + } else + MPI = N->getPointerInfo().getWithOffset( + LoMemVT.getStoreSize().getFixedSize()); + + MMO = DAG.getMachineFunction().getMachineMemOperand( + MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, + N->getAAInfo(), N->getRanges()); + + Hi = DAG.getStoreVP(Ch, DL, DataHi, Ptr, Offset, MaskHi, EVLHi, HiMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), + N->isCompressingStore()); + + // Build a factor node to remember that this store is independent of the + // other one. + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo) { assert(N->isUnindexed() && "Indexed masked store of vector?"); @@ -3028,6 +3192,9 @@ case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast(N)); break; + case ISD::VP_LOAD: + Res = WidenVecRes_VP_LOAD(cast(N)); + break; case ISD::MLOAD: Res = WidenVecRes_MLOAD(cast(N)); break; @@ -4193,6 +4360,33 @@ report_fatal_error("Unable to widen vector load"); } +SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Mask = N->getMask(); + SDValue EVL = N->getVectorLength(); + ISD::LoadExtType ExtType = N->getExtensionType(); + SDLoc dl(N); + + // The mask should be widened as well + assert(getTypeAction(Mask.getValueType()) == TargetLowering::TypeWidenVector && + "Unable to widen binary VP op"); + EVT WideMaskVT = + TLI.getTypeToTransformTo(*DAG.getContext(), Mask.getValueType()); + Mask = GetWidenedVector(Mask); + assert(Mask.getValueType().getVectorElementCount() == + WideMaskVT.getVectorElementCount() && + "Unable to widen vector load"); + + SDValue Res = + DAG.getLoadVP(N->getAddressingMode(), ExtType, WidenVT, dl, N->getChain(), + N->getBasePtr(), N->getOffset(), Mask, EVL, + N->getMemoryVT(), N->getMemOperand(), N->isExpandingLoad()); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); @@ -4680,6 +4874,7 @@ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; + case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break; case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; @@ -5061,15 +5256,54 @@ unsigned NumVTElts = StVT.getVectorMinNumElements(); SDValue EVL = DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); - const auto *MMO = ST->getMemOperand(); - return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), Mask, - EVL, MMO->getPointerInfo(), MMO->getAlign(), - MMO->getFlags(), MMO->getAAInfo()); + return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), + DAG.getUNDEF(ST->getBasePtr().getValueType()), Mask, + EVL, StVal.getValueType(), ST->getMemOperand(), + ST->getAddressingMode()); } report_fatal_error("Unable to widen vector store"); } +SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) { + assert((OpNo == 1 || OpNo == 3) && + "Can widen only data or mask operand of vp_store"); + VPStoreSDNode *ST = cast(N); + SDValue Mask = ST->getMask(); + SDValue StVal = ST->getValue(); + SDLoc dl(N); + + if (OpNo == 1) { + // Widen the value. + StVal = GetWidenedVector(StVal); + + // We only handle the case where the mask needs widening to an + // identically-sized type as the vector inputs. + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen VP store"); + Mask = GetWidenedVector(Mask); + } else { + Mask = GetWidenedVector(Mask); + + // We only handle the case where the stored value needs widening to an + // identically-sized type as the mask. + EVT ValueVT = StVal.getValueType(); + assert(getTypeAction(ValueVT) == TargetLowering::TypeWidenVector && + "Unable to widen VP store"); + StVal = GetWidenedVector(StVal); + } + + assert(Mask.getValueType().getVectorElementCount() == + StVal.getValueType().getVectorElementCount() && + "Mask and data vectors should have the same number of elements"); + return DAG.getStoreVP(ST->getChain(), dl, StVal, ST->getBasePtr(), + ST->getOffset(), Mask, ST->getVectorLength(), + ST->getMemoryVT(), ST->getMemOperand(), + ST->getAddressingMode(), /*IsTruncating*/ false, + ST->isCompressingStore()); +} + SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { assert((OpNo == 1 || OpNo == 3) && "Can widen only data or mask operand of mstore"); @@ -5748,6 +5982,8 @@ EVT InVT = InOp.getValueType(); assert(InVT.getVectorElementType() == NVT.getVectorElementType() && "input and widen element type must match"); + assert(!InVT.isScalableVector() && !NVT.isScalableVector() && + "cannot modify scalable vectors in this way"); SDLoc dl(InOp); // Check if InOp already has the right width. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7694,23 +7694,6 @@ SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) { - if (VT == MemVT) { - ExtType = ISD::NON_EXTLOAD; - } else if (ExtType == ISD::NON_EXTLOAD) { - assert(VT == MemVT && "Non-extending load from different memory type!"); - } else { - // Extending load. - assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && - "Should only be an extending load, not truncating!"); - assert(VT.isInteger() == MemVT.isInteger() && - "Cannot convert from FP to Int or Int -> FP!"); - assert(VT.isVector() == MemVT.isVector() && - "Cannot use an ext load to convert to or from a vector!"); - assert((!VT.isVector() || - VT.getVectorElementCount() == MemVT.getVectorElementCount()) && - "Cannot use an ext load to change the number of vector elements!"); - } - bool Indexed = AM != ISD::UNINDEXED; assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); @@ -7799,48 +7782,29 @@ } SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, - SDValue Ptr, SDValue Mask, SDValue EVL, - MachinePointerInfo PtrInfo, Align Alignment, - MachineMemOperand::Flags MMOFlags, - const AAMDNodes &AAInfo, bool IsCompressing) { + SDValue Ptr, SDValue Offset, SDValue Mask, + SDValue EVL, EVT MemVT, MachineMemOperand *MMO, + ISD::MemIndexedMode AM, bool IsTruncating, + bool IsCompressing) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - - MMOFlags |= MachineMemOperand::MOStore; - assert((MMOFlags & MachineMemOperand::MOLoad) == 0); - - if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); - - MachineFunction &MF = getMachineFunction(); - uint64_t Size = - MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize()); - MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); - return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing); -} - -SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, - SDValue Ptr, SDValue Mask, SDValue EVL, - MachineMemOperand *MMO, bool IsCompressing) { - assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - EVT VT = Val.getValueType(); - SDVTList VTs = getVTList(MVT::Other); - SDValue Undef = getUNDEF(Ptr.getValueType()); - SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL}; + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!"); + SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) + : getVTList(MVT::Other); + SDValue Ops[] = {Chain, Val, Ptr, Offset, Mask, EVL}; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); - ID.AddInteger(VT.getRawBits()); + ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData( - dl.getIROrder(), VTs, ISD::UNINDEXED, false, IsCompressing, VT, MMO)); + dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); return SDValue(E, 0); } - auto *N = - newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, - ISD::UNINDEXED, false, IsCompressing, VT, MMO); + auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + IsTruncating, IsCompressing, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -7882,7 +7846,9 @@ assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (VT == SVT) - return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing); + return getStoreVP(Chain, dl, Val, Ptr, getUNDEF(Ptr.getValueType()), Mask, + EVL, VT, MMO, ISD::UNINDEXED, + /*IsTruncating*/ false, IsCompressing); assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && "Should only be a truncating store, not extending!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7390,12 +7390,14 @@ AAMDNodes AAInfo = VPIntrin.getAAMetadata(); SDValue ST; if (!IsScatter) { + SDValue Ptr = OpValues[1]; + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, MemoryLocation::UnknownSize, *Alignment, AAInfo); - ST = - DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], OpValues[1], - OpValues[2], OpValues[3], MMO, false /* IsTruncating */); + ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset, + OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED, + /* IsTruncating */ false, /*IsCompressing*/ false); } else { unsigned AS = PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7536,12 +7536,12 @@ if (VecVT.getVectorElementType() == MemVT) { SDLoc DL(N); MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); - return DAG.getStoreVP(Store->getChain(), DL, Src, Store->getBasePtr(), - DAG.getConstant(1, DL, MaskVT), - DAG.getConstant(1, DL, Subtarget.getXLenVT()), - Store->getPointerInfo(), - Store->getOriginalAlign(), - Store->getMemOperand()->getFlags()); + return DAG.getStoreVP( + Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(), + DAG.getConstant(1, DL, MaskVT), + DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT, + Store->getMemOperand(), Store->getAddressingMode(), + Store->isTruncatingStore(), /*IsCompress*/ false); } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -16,6 +16,18 @@ ret <2 x i8> %load } +declare <3 x i8> @llvm.vp.load.v3i8.p0v3i8(<3 x i8>*, <3 x i1>, i32) + +define <3 x i8> @vpload_v3i8(<3 x i8>* %ptr, <3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vle8.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <3 x i8> @llvm.vp.load.v3i8.p0v3i8(<3 x i8>* %ptr, <3 x i1> %m, i32 %evl) + ret <3 x i8> %load +} + declare <4 x i8> @llvm.vp.load.v4i8.p0v4i8(<4 x i8>*, <4 x i1>, i32) define <4 x i8> @vpload_v4i8(<4 x i8>* %ptr, <4 x i1> %m, i32 zeroext %evl) { @@ -124,6 +136,30 @@ ret <4 x i32> %load } +declare <6 x i32> @llvm.vp.load.v6i32.p0v6i32(<6 x i32>*, <6 x i1>, i32) + +define <6 x i32> @vpload_v6i32(<6 x i32>* %ptr, <6 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v6i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <6 x i32> @llvm.vp.load.v6i32.p0v6i32(<6 x i32>* %ptr, <6 x i1> %m, i32 %evl) + ret <6 x i32> %load +} + +define <6 x i32> @vpload_v6i32_allones_mask(<6 x i32>* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v6i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement <6 x i1> undef, i1 true, i32 0 + %b = shufflevector <6 x i1> %a, <6 x i1> poison, <6 x i32> zeroinitializer + %load = call <6 x i32> @llvm.vp.load.v6i32.p0v6i32(<6 x i32>* %ptr, <6 x i1> %b, i32 %evl) + ret <6 x i32> %load +} + declare <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>*, <8 x i1>, i32) define <8 x i32> @vpload_v8i32(<8 x i32>* %ptr, <8 x i1> %m, i32 zeroext %evl) { @@ -339,3 +375,33 @@ %load = call <8 x double> @llvm.vp.load.v8f64.p0v8f64(<8 x double>* %ptr, <8 x i1> %m, i32 %evl) ret <8 x double> %load } + +declare <32 x double> @llvm.vp.load.v32f64.p0v32f64(<32 x double>*, <32 x i1>, i32) + +define <32 x double> @vpload_v32f64(<32 x double>* %ptr, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v32f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, a1, -16 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: bltu a1, a3, .LBB31_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB31_2: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-NEXT: addi a3, a0, 128 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; CHECK-NEXT: vle64.v v16, (a3), v0.t +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: bltu a1, a2, .LBB31_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB31_4: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <32 x double> @llvm.vp.load.v32f64.p0v32f64(<32 x double>* %ptr, <32 x i1> %m, i32 %evl) + ret <32 x double> %load +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll @@ -208,6 +208,18 @@ ret void } +declare void @llvm.vp.store.v6f32.p0v6f32(<6 x float>, <6 x float>*, <6 x i1>, i32) + +define void @vpstore_v6f32(<6 x float> %val, <6 x float>* %ptr, <6 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v6f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v6f32.p0v6f32(<6 x float> %val, <6 x float>* %ptr, <6 x i1> %m, i32 %evl) + ret void +} + declare void @llvm.vp.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, <8 x i1>, i32) define void @vpstore_v8f32(<8 x float> %val, <8 x float>* %ptr, <8 x i1> %m, i32 zeroext %evl) { @@ -267,3 +279,32 @@ call void @llvm.vp.store.v2i8.p0v2i8(<2 x i8> %val, <2 x i8>* %ptr, <2 x i1> %b, i32 %evl) ret void } + +declare void @llvm.vp.store.v32f64.p0v32f64(<32 x double>, <32 x double>*, <32 x i1>, i32) + +define void @vpstore_v32f64(<32 x double> %val, <32 x double>* %ptr, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v32f64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: bltu a1, a2, .LBB23_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu +; CHECK-NEXT: addi a3, a1, -16 +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: bltu a1, a3, .LBB23_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB23_4: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; CHECK-NEXT: vse64.v v16, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v32f64.p0v32f64(<32 x double> %val, <32 x double>* %ptr, <32 x i1> %m, i32 %evl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v,+m \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v,+m \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.load.nxv1i8.p0nxv1i8(*, , i32) @@ -40,6 +40,18 @@ ret %load } +declare @llvm.vp.load.nxv3i8.p0nxv3i8(*, , i32) + +define @vpload_nxv3i8(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv3i8.p0nxv3i8(* %ptr, %m, i32 %evl) + ret %load +} + declare @llvm.vp.load.nxv4i8.p0nxv4i8(*, , i32) define @vpload_nxv4i8(* %ptr, %m, i32 zeroext %evl) { @@ -435,3 +447,35 @@ %load = call @llvm.vp.load.nxv8f64.p0nxv8f64(* %ptr, %m, i32 %evl) ret %load } + +declare @llvm.vp.load.nxv16f64.p0nxv16f64(*, , i32) + +define @vpload_nxv16f64(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a5, a2, 3 +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, mu +; CHECK-NEXT: sub a4, a1, a2 +; CHECK-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-NEXT: bltu a1, a4, .LBB37_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a4 +; CHECK-NEXT: .LBB37_2: +; CHECK-NEXT: slli a4, a2, 3 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu +; CHECK-NEXT: vle64.v v16, (a4), v0.t +; CHECK-NEXT: bltu a1, a2, .LBB37_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB37_4: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv16f64.p0nxv16f64(* %ptr, %m, i32 %evl) + ret %load +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v,+m \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v,+m \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.vp.store.nxv1i8.p0nxv1i8(, *, , i32) @@ -28,6 +28,18 @@ ret void } +declare void @llvm.vp.store.nxv3i8.p0nxv3i8(, *, , i32) + +define void @vpstore_nxv3i8( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vse8.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv3i8.p0nxv3i8( %val, * %ptr, %m, i32 %evl) + ret void +} + declare void @llvm.vp.store.nxv4i8.p0nxv4i8(, *, , i32) define void @vpstore_nxv4i8( %val, * %ptr, %m, i32 zeroext %evl) { @@ -351,3 +363,34 @@ call void @llvm.vp.store.nxv1i8.p0nxv1i8( %val, * %ptr, %b, i32 %evl) ret void } + +declare void @llvm.vp.store.nxv16f64.p0nxv16f64(, *, , i32) + +define void @vpstore_nxv16f64( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: bltu a1, a2, .LBB30_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB30_2: +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: srli a5, a2, 3 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, mu +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-NEXT: bltu a1, a3, .LBB30_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: .LBB30_4: +; CHECK-NEXT: slli a1, a2, 3 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu +; CHECK-NEXT: vse64.v v16, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv16f64.p0nxv16f64( %val, * %ptr, %m, i32 %evl) + ret void +}