Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -541,28 +541,29 @@ LLVM_DEBUG(dbgs() << "Legalizing truncating store operations\n"); SDValue Value = ST->getValue(); EVT StVT = ST->getMemoryVT(); - unsigned StWidth = StVT.getSizeInBits(); + TypeSize StWidth = StVT.getSizeInBits(); + TypeSize StSize = StVT.getStoreSizeInBits(); auto &DL = DAG.getDataLayout(); - if (StWidth != StVT.getStoreSizeInBits()) { + if (StWidth != StSize) { // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), - StVT.getStoreSizeInBits()); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StSize.getFixedSize()); Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, ST->getOriginalAlign(), MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); - } else if (StWidth & (StWidth - 1)) { + } else if (!isPowerOf2_64(StWidth.getKnownMinSize())) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); - unsigned LogStWidth = Log2_32(StWidth); + unsigned StWidthBits = StWidth.getFixedSize(); + unsigned LogStWidth = Log2_32(StWidthBits); assert(LogStWidth < 32); unsigned RoundWidth = 1 << LogStWidth; - assert(RoundWidth < StWidth); - unsigned ExtraWidth = StWidth - RoundWidth; + assert(RoundWidth < StWidthBits); + unsigned ExtraWidth = StWidthBits - RoundWidth; assert(ExtraWidth < RoundWidth); assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && "Store size not an integral number of bytes!"); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2425,9 +2425,10 @@ DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty); SDValue Lo, Hi, Res; + unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, N->getAAInfo(), N->getRanges()); + N->getPointerInfo(), MachineMemOperand::MOStore, LoSize, Alignment, + N->getAAInfo(), N->getRanges()); Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), @@ -2441,11 +2442,18 @@ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, N->isCompressingStore()); - unsigned HiOffset = LoMemVT.getStoreSize(); + MachinePointerInfo MPI; + if (LoMemVT.isScalableVector()) + MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); + else + MPI = N->getPointerInfo().getWithOffset( + LoMemVT.getStoreSize().getFixedSize()); + + unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize()); MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore, - HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); + MPI, MachineMemOperand::MOStore, HiSize, Alignment, N->getAAInfo(), + N->getRanges()); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2214,6 +2214,10 @@ /// SimplifyMultipleUseDemandedBits and not generate any new nodes. SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) { EVT VT = V.getValueType(); + + if (VT.isScalableVector()) + return SDValue(); + APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); @@ -2227,6 +2231,9 @@ /// SimplifyMultipleUseDemandedBits and not generate any new nodes. SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, const APInt &DemandedElts) { + if (!DemandedElts) + return SDValue(); + switch (V.getOpcode()) { default: return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, @@ -7107,7 +7114,8 @@ MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo); + PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()), + Alignment, AAInfo); return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } @@ -7128,7 +7136,7 @@ assert(VT.isVector() == SVT.isVector() && "Cannot use trunc store to convert to or from a vector!"); assert((!VT.isVector() || - VT.getVectorNumElements() == SVT.getVectorNumElements()) && + VT.getVectorElementCount() == SVT.getVectorElementCount()) && "Cannot use trunc store to change the number of vector elements!"); SDVTList VTs = getVTList(MVT::Other); Index: llvm/test/CodeGen/AArch64/sve-split-store.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-store.ll +++ llvm/test/CodeGen/AArch64/sve-split-store.ll @@ -1,5 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning ; UNPREDICATED @@ -113,10 +117,10 @@ ; CHECK-NEXT: zip1 p0.h, p0.h, p1.h ; CHECK-NEXT: zip2 p3.s, p2.s, p1.s ; CHECK-NEXT: zip1 p2.s, p2.s, p1.s +; CHECK-NEXT: st1d { z3.d }, p3, [x0, #3, mul vl] ; CHECK-NEXT: st1d { z2.d }, p2, [x0, #2, mul vl] ; CHECK-NEXT: zip2 p2.s, p0.s, p1.s ; CHECK-NEXT: zip1 p0.s, p0.s, p1.s -; CHECK-NEXT: st1d { z3.d }, p3, [x0, #3, mul vl] ; CHECK-NEXT: st1d { z1.d }, p2, [x0, #1, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret