Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19804,8 +19804,8 @@ // TODO: Can relax for unordered atomics (see D66309) if (StoreSDNode *ST1 = dyn_cast(Chain)) { - if (ST->isUnindexed() && ST->isSimple() && - ST1->isUnindexed() && ST1->isSimple()) { + if (ST->isUnindexed() && ST->isSimple() && ST1->isUnindexed() && + ST1->isSimple()) { if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr && ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() && ST->getAddressSpace() == ST1->getAddressSpace()) { @@ -19816,20 +19816,25 @@ if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && !ST1->getBasePtr().isUndef() && - // BaseIndexOffset and the code below requires knowing the size - // of a vector, so bail out if MemoryVT is scalable. - !ST->getMemoryVT().isScalableVector() && - !ST1->getMemoryVT().isScalableVector() && ST->getAddressSpace() == ST1->getAddressSpace()) { + // If we consider two stores and one smaller in size is a scalable + // vector type and another one a bigger size store with a fixed type, + // then we could not allow the scalable store removal because we don't + // know its final size in the end. const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG); const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG); - unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits(); - unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits(); // If this is a store who's preceding store to a subset of the current // location and no one other node is chained to that store we can // effectively drop the store. Do not remove stores to undef as they may // be used as data sinks. - if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) { + if ((ST1->getBasePtr() == Ptr && + TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(), + ST->getMemoryVT().getStoreSize())) || + (!ST->getMemoryVT().isScalableVector() && + !ST1->getMemoryVT().isScalableVector() && + STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(), + ChainBase, + ST1->getMemoryVT().getFixedSizeInBits()))) { CombineTo(ST1, ST1->getChain()); return SDValue(); } Index: llvm/test/CodeGen/AArch64/sve-redundant-store.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-redundant-store.ll +++ llvm/test/CodeGen/AArch64/sve-redundant-store.ll @@ -8,18 +8,29 @@ ; *p = 1; ; *(svint32_t *)p = v; ; } - -; Update me: Until dead store elimination is improved in DAGCombine, this will contain a redundant store. -; define void @redundant_store(ptr nocapture %p, %v) { ; CHECK-LABEL: redundant_store: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret store i32 1, ptr %p, align 4 store %v, * %p, align 16 ret void } + +; make sure that scalable store is present, becuase we don't know its final size. +define void @keep_scalable_store(ptr writeonly %ptr, ptr %a, %b) { +; CHECK-LABEL: keep_scalable_store: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp q2, q1, [x1] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: stp q2, q1, [x0] +; CHECK-NEXT: ret +entry: + %0 = load <8 x i32>, ptr %a + store %b, ptr %ptr + store <8 x i32> %0, ptr %ptr + ret void +}