diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -409,6 +409,26 @@ } // end anonymous namespace +/// Check if two instruction are masked stores that completely +/// overwrite one another. +static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later, + const Instruction *Earlier) { + const auto *IIL = dyn_cast(Later); + const auto *IIE = dyn_cast(Earlier); + if (IIL == nullptr || IIE == nullptr) + return OW_Unknown; + if (IIL->getIntrinsicID() != Intrinsic::masked_store || + IIE->getIntrinsicID() != Intrinsic::masked_store) + return OW_Unknown; + // Pointers. + if (IIL->getArgOperand(1) != IIE->getArgOperand(1)) + return OW_Unknown; + // Masks. + if (IIL->getArgOperand(3) != IIE->getArgOperand(3)) + return OW_Unknown; + return OW_Complete; +} + /// Return 'OW_Complete' if a store to the 'Later' location completely /// overwrites a store to the 'Earlier' location. Return OW_MaybePartial /// if \p Later does not completely overwrite \p Earlier, but they both @@ -417,14 +437,18 @@ /// nothing can be determined. template static OverwriteResult -isOverwrite(const MemoryLocation &Later, const MemoryLocation &Earlier, +isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, + const MemoryLocation &Later, const MemoryLocation &Earlier, const DataLayout &DL, const TargetLibraryInfo &TLI, int64_t &EarlierOff, int64_t &LaterOff, AATy &AA, const Function *F) { // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll // get imprecise values here, though (except for unknown sizes). - if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) - return OW_Unknown; + if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) { + // Masked stores have imprecise locations, but we can reason about them + // to some extent. + return isMaskedStoreOverwrite(LaterI, EarlierI); + } const uint64_t LaterSize = Later.Size.getValue(); const uint64_t EarlierSize = Earlier.Size.getValue(); @@ -492,24 +516,6 @@ return OW_MaybePartial; } -static OverwriteResult isMaskedStoreOverwrite(Instruction *Later, - Instruction *Earlier) { - auto *IIL = dyn_cast(Later); - auto *IIE = dyn_cast(Earlier); - if (IIL == nullptr || IIE == nullptr) - return OW_Unknown; - if (IIL->getIntrinsicID() != Intrinsic::masked_store || - IIE->getIntrinsicID() != Intrinsic::masked_store) - return OW_Unknown; - // Pointers. - if (IIL->getArgOperand(1) != IIE->getArgOperand(1)) - return OW_Unknown; - // Masks. - if (IIL->getArgOperand(3) != IIE->getArgOperand(3)) - return OW_Unknown; - return OW_Complete; -} - /// Return 'OW_Complete' if a store to the 'Later' location completely /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the @@ -1374,13 +1380,9 @@ if (isRemovable(DepWrite) && !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, - InstWriteOffset, *AA, BB.getParent()); - if (OR == OW_Unknown) { - // isOverwrite punts on MemoryLocations with an imprecise size, such - // as masked stores. Handle this here, somwewhat inelegantly. - OR = isMaskedStoreOverwrite(Inst, DepWrite); - } + OverwriteResult OR = isOverwrite(Inst, DepWrite, Loc, DepLoc, DL, *TLI, + DepWriteOffset, InstWriteOffset, *AA, + BB.getParent()); if (OR == OW_MaybePartial) OR = isPartialOverwrite(Loc, DepLoc, DepWriteOffset, InstWriteOffset, DepWrite, IOL); @@ -1705,6 +1707,8 @@ switch (CB->getIntrinsicID()) { case Intrinsic::init_trampoline: return {MemoryLocation(CB->getArgOperand(0))}; + case Intrinsic::masked_store: + return {MemoryLocation::getForArgument(CB, 1, TLI)}; default: break; } @@ -1715,7 +1719,8 @@ } /// Returns true if \p Use completely overwrites \p DefLoc. - bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) { + bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *DefInst, + Instruction *UseInst) { // UseInst has a MemoryDef associated in MemorySSA. It's possible for a // MemoryDef to not write to memory, e.g. a volatile load is modeled as a // MemoryDef. @@ -1727,9 +1732,10 @@ return false; int64_t InstWriteOffset, DepWriteOffset; - auto CC = getLocForWriteEx(UseInst); - return CC && isOverwrite(*CC, DefLoc, DL, TLI, DepWriteOffset, - InstWriteOffset, BatchAA, &F) == OW_Complete; + if (auto CC = getLocForWriteEx(UseInst)) + return isOverwrite(UseInst, DefInst, *CC, DefLoc, DL, TLI, DepWriteOffset, + InstWriteOffset, BatchAA, &F) == OW_Complete; + return false; } /// Returns true if \p Def is not read before returning from the function. @@ -1822,14 +1828,30 @@ return BatchAA.isMustAlias(MaybeTermLoc->first, DefLoc); } + /// Returns true if the intrinsic can read from memory. + /// Intrinsics are calls, and so they are conservatively assumed to + /// read memory. This function allows for a more detailed check. + bool mayReadFromMemory(IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + case Intrinsic::masked_store: + return false; + } + return true; + } + // Returns true if \p Use may read from \p DefLoc. bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) { if (!UseInst->mayReadFromMemory()) return false; - if (auto *CB = dyn_cast(UseInst)) + if (auto *CB = dyn_cast(UseInst)) { if (CB->onlyAccessesInaccessibleMemory()) return false; + if (auto *II = dyn_cast(UseInst)) { + if (!mayReadFromMemory(II)) + return false; + } + } // NOTE: For calls, the number of stores removed could be slightly improved // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to @@ -1970,8 +1992,8 @@ continue; } else { int64_t InstWriteOffset, DepWriteOffset; - auto OR = isOverwrite(DefLoc, *CurrentLoc, DL, TLI, DepWriteOffset, - InstWriteOffset, BatchAA, &F); + auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI, + DepWriteOffset, InstWriteOffset, BatchAA, &F); // If Current does not write to the same object as KillingDef, check // the next candidate. if (OR == OW_Unknown) { @@ -2115,7 +2137,7 @@ // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, // stores [0,1] if (MemoryDef *UseDef = dyn_cast(UseAccess)) { - if (isCompleteOverwrite(DefLoc, UseInst)) { + if (isCompleteOverwrite(DefLoc, KillingI, UseInst)) { if (!isInvisibleToCallerAfterRet(DefUO) && UseAccess != EarlierAccess) { BasicBlock *MaybeKillingBlock = UseInst->getParent(); @@ -2472,7 +2494,7 @@ // Check if NI overwrites SI. int64_t InstWriteOffset, DepWriteOffset; OverwriteResult OR = - isOverwrite(SILoc, NILoc, State.DL, TLI, DepWriteOffset, + isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset, InstWriteOffset, State.BatchAA, &F); if (OR == OW_MaybePartial) { auto Iter = State.IOLs.insert( diff --git a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll --- a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll +++ b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -tbaa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s +; RUN: opt -tbaa -dse -enable-dse-memoryssa=true -S < %s | FileCheck %s target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" define dllexport i32 @f0(i8** %a0, i8** %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) #0 {