diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1705,6 +1705,8 @@ switch (CB->getIntrinsicID()) { case Intrinsic::init_trampoline: return {MemoryLocation(CB->getArgOperand(0))}; + case Intrinsic::masked_store: + return {MemoryLocation::getForArgument(CB, 1, TLI)}; default: break; } @@ -1715,7 +1717,8 @@ } /// Returns true if \p Use completely overwrites \p DefLoc. - bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) { + bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *DefInst, + Instruction *UseInst) { // UseInst has a MemoryDef associated in MemorySSA. It's possible for a // MemoryDef to not write to memory, e.g. a volatile load is modeled as a // MemoryDef. @@ -1727,9 +1730,13 @@ return false; int64_t InstWriteOffset, DepWriteOffset; - auto CC = getLocForWriteEx(UseInst); - return CC && isOverwrite(*CC, DefLoc, DL, TLI, DepWriteOffset, - InstWriteOffset, BatchAA, &F) == OW_Complete; + if (auto CC = getLocForWriteEx(UseInst)) { + if (isOverwrite(*CC, DefLoc, DL, TLI, DepWriteOffset, + InstWriteOffset, BatchAA, &F) == OW_Complete) + return true; + return isMaskedStoreOverwrite(UseInst, DefInst) == OW_Complete; + } + return false; } /// Returns true if \p Def is not read before returning from the function. @@ -1822,14 +1829,30 @@ return BatchAA.isMustAlias(MaybeTermLoc->first, DefLoc); } + /// Returns true if the intrinsic can read from memory. + /// Intrinsics are calls, and so they are conservatively assumed to + /// read memory. This function allows for a more detailed check. + bool mayReadFromMemory(IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + case Intrinsic::masked_store: + return false; + } + return true; + } + // Returns true if \p Use may read from \p DefLoc. bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) { if (!UseInst->mayReadFromMemory()) return false; - if (auto *CB = dyn_cast(UseInst)) + if (auto *CB = dyn_cast(UseInst)) { if (CB->onlyAccessesInaccessibleMemory()) return false; + if (auto *II = dyn_cast(UseInst)) { + if (!mayReadFromMemory(II)) + return false; + } + } // NOTE: For calls, the number of stores removed could be slightly improved // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to @@ -1972,6 +1995,11 @@ int64_t InstWriteOffset, DepWriteOffset; auto OR = isOverwrite(DefLoc, *CurrentLoc, DL, TLI, DepWriteOffset, InstWriteOffset, BatchAA, &F); + if (OR == OW_Unknown) { + // isOverwrite punts on MemoryLocations with an imprecise size, such + // as masked stores. Handle this here, somwewhat inelegantly. + OR = isMaskedStoreOverwrite(KillingI, CurrentI); + } // If Current does not write to the same object as KillingDef, check // the next candidate. if (OR == OW_Unknown) { @@ -2115,7 +2143,7 @@ // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, // stores [0,1] if (MemoryDef *UseDef = dyn_cast(UseAccess)) { - if (isCompleteOverwrite(DefLoc, UseInst)) { + if (isCompleteOverwrite(DefLoc, KillingI, UseInst)) { if (!isInvisibleToCallerAfterRet(DefUO) && UseAccess != EarlierAccess) { BasicBlock *MaybeKillingBlock = UseInst->getParent(); @@ -2474,6 +2502,11 @@ OverwriteResult OR = isOverwrite(SILoc, NILoc, State.DL, TLI, DepWriteOffset, InstWriteOffset, State.BatchAA, &F); + if (OR == OW_Unknown) { + // isOverwrite punts on MemoryLocations with an imprecise size, such + // as masked stores. Handle this here, somwewhat inelegantly. + OR = isMaskedStoreOverwrite(NI, SI); + } if (OR == OW_MaybePartial) { auto Iter = State.IOLs.insert( std::make_pair( diff --git a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll --- a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll +++ b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -tbaa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s +; RUN: opt -tbaa -dse -enable-dse-memoryssa=true -S < %s | FileCheck %s target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" define dllexport i32 @f0(i8** %a0, i8** %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) #0 {