Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19816,23 +19816,34 @@ if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && !ST1->getBasePtr().isUndef() && - // BaseIndexOffset and the code below requires knowing the size - // of a vector, so bail out if MemoryVT is scalable. - !ST->getMemoryVT().isScalableVector() && - !ST1->getMemoryVT().isScalableVector() && ST->getAddressSpace() == ST1->getAddressSpace()) { - const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG); - const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG); - unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits(); - unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits(); - // If this is a store who's preceding store to a subset of the current - // location and no one other node is chained to that store we can - // effectively drop the store. Do not remove stores to undef as they may - // be used as data sinks. - if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) { + // If we consider two stores and one smaller in size is a scalable + // vector type and another one a bigger size store with a fixed type, + // then we could not allow the scalable store removal because we don't + // know its final size in the end. + if ((ST->getMemoryVT().isScalableVector() || + ST1->getMemoryVT().isScalableVector()) && + (ST1->getBasePtr() == Ptr && + TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(), + ST->getMemoryVT().getStoreSize()))) { CombineTo(ST1, ST1->getChain()); return SDValue(); } + if (!ST->getMemoryVT().isScalableVector() && + !ST1->getMemoryVT().isScalableVector()) { + const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG); + const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG); + // If this is a store who's preceding store to a subset of the current + // location and no one other node is chained to that store we can + // effectively drop the store. Do not remove stores to undef as they + // may be used as data sinks. + if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(), + ChainBase, + ST1->getMemoryVT().getFixedSizeInBits())) { + CombineTo(ST1, ST1->getChain()); + return SDValue(); + } + } } } } Index: llvm/test/CodeGen/AArch64/sve-redundant-store.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-redundant-store.ll +++ llvm/test/CodeGen/AArch64/sve-redundant-store.ll @@ -8,18 +8,69 @@ ; *p = 1; ; *(svint32_t *)p = v; ; } - -; Update me: Until dead store elimination is improved in DAGCombine, this will contain a redundant store. -; define void @redundant_store(ptr nocapture %p, %v) { ; CHECK-LABEL: redundant_store: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret store i32 1, ptr %p, align 4 store %v, * %p, align 16 ret void } + +define void @two_scalable_same_size(ptr writeonly %ptr, %a, %b) { +; CHECK-LABEL: two_scalable_same_size: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1w { z1.s }, p0, [x0] +; CHECK-NEXT: ret +entry: + store %a, ptr %ptr + store %b, ptr %ptr + ret void +} + +; make sure that scalable store is present, becuase we don't know its final size. +define void @keep_scalable_store(ptr writeonly %ptr, ptr %a, %b) { +; CHECK-LABEL: keep_scalable_store: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp q2, q1, [x1] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: stp q2, q1, [x0] +; CHECK-NEXT: ret +entry: + %0 = load <8 x i32>, ptr %a + store %b, ptr %ptr + store <8 x i32> %0, ptr %ptr + ret void +} + +define void @two_scalable_keep_stores(ptr writeonly %ptr, %a, %b) { +; CHECK-LABEL: two_scalable_keep_stores: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: st1d { z2.d }, p0, [x0, #1, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [x0] +; CHECK-NEXT: st1w { z0.s }, p1, [x0] +; CHECK-NEXT: ret +entry: + store %b, ptr %ptr + store %a, ptr %ptr + ret void +} + +define void @two_scalable_remove_store(ptr writeonly %ptr, %a, %b) { +; CHECK-LABEL: two_scalable_remove_store: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1d { z2.d }, p0, [x0, #1, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [x0] +; CHECK-NEXT: ret +entry: + store %a, ptr %ptr + store %b, ptr %ptr + ret void +}