Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19737,20 +19737,28 @@ if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && !ST1->getBasePtr().isUndef() && - // BaseIndexOffset and the code below requires knowing the size - // of a vector, so bail out if MemoryVT is scalable. - !ST->getMemoryVT().isScalableVector() && - !ST1->getMemoryVT().isScalableVector() && ST->getAddressSpace() == ST1->getAddressSpace()) { + // If we consider two stores and one smaller in size is a scalable + // vector type and another one a bigger size store with a fixed type, + // then we could not allow the scalable store removal because we don't + // know its final size in the end. const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG); const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG); - unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits(); - unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits(); + unsigned STBitSize = + ST->getMemoryVT().isScalableVector() + ? ST->getMemoryVT().getStoreSize().getKnownMinValue() * 8 + : ST->getMemoryVT().getFixedSizeInBits(); + unsigned ChainBitSize = + ST1->getMemoryVT().isScalableVector() + ? ST1->getMemoryVT().getStoreSize().getKnownMinValue() * 8 + : ST1->getMemoryVT().getFixedSizeInBits(); // If this is a store who's preceding store to a subset of the current // location and no one other node is chained to that store we can // effectively drop the store. Do not remove stores to undef as they may // be used as data sinks. - if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) { + if ((!ST1->getMemoryVT().isScalableVector() || + STBitSize <= ChainBitSize) && + STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) { CombineTo(ST1, ST1->getChain()); return SDValue(); } Index: llvm/test/CodeGen/AArch64/sve-insert-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -529,9 +529,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: addpl x8, sp, #4 +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: str d1, [x8] ; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 Index: llvm/test/CodeGen/AArch64/sve-redundant-store.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-redundant-store.ll +++ llvm/test/CodeGen/AArch64/sve-redundant-store.ll @@ -8,18 +8,22 @@ ; *p = 1; ; *(svint32_t *)p = v; ; } - -; Update me: Until dead store elimination is improved in DAGCombine, this will contain a redundant store. -; define void @redundant_store(ptr nocapture %p, %v) { ; CHECK-LABEL: redundant_store: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret store i32 1, ptr %p, align 4 store %v, * %p, align 16 ret void } + +; make sure that scalable store is present, becuase we don't know its final size. +define void @keep_scalable_store(ptr writeonly %ptr, ptr %a, %b) { +entry: + %0 = load <8 x i32>, ptr %a + store %b, ptr %ptr + store <8 x i32> %0, ptr %ptr + ret void +}