diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -13,10 +13,10 @@ // in between both MemoryDefs. A bit more concretely: // // For all MemoryDefs StartDef: -// 1. Get the next dominating clobbering MemoryDef (EarlierAccess) by walking +// 1. Get the next dominating clobbering MemoryDef (KilledAccess) by walking // upwards. -// 2. Check that there are no reads between EarlierAccess and the StartDef by -// checking all uses starting at EarlierAccess and walking until we see +// 2. Check that there are no reads between KilledAccess and the StartDef by +// checking all uses starting at KilledAccess and walking until we see // StartDef. // 3. For each found CurrentDef, check that: // 1. There are no barrier instructions between CurrentDef and StartDef (like @@ -334,147 +334,150 @@ } // end anonymous namespace /// Check if two instruction are masked stores that completely -/// overwrite one another. More specifically, \p Later has to -/// overwrite \p Earlier. -static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later, - const Instruction *Earlier, +/// overwrite one another. More specifically, \p KillingI has to +/// overwrite \p KilledI. +static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, + const Instruction *KilledI, BatchAAResults &AA) { - const auto *IIL = dyn_cast(Later); - const auto *IIE = dyn_cast(Earlier); - if (IIL == nullptr || IIE == nullptr) + const auto *KillingII = dyn_cast(KillingI); + const auto *KilledII = dyn_cast(KilledI); + if (KillingII == nullptr || KilledII == nullptr) return OW_Unknown; - if (IIL->getIntrinsicID() != Intrinsic::masked_store || - IIE->getIntrinsicID() != Intrinsic::masked_store) + if (KillingII->getIntrinsicID() != Intrinsic::masked_store || + KilledII->getIntrinsicID() != Intrinsic::masked_store) return OW_Unknown; // Pointers. - Value *LP = IIL->getArgOperand(1)->stripPointerCasts(); - Value *EP = IIE->getArgOperand(1)->stripPointerCasts(); - if (LP != EP && !AA.isMustAlias(LP, EP)) + Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts(); + Value *KilledPtr = KilledII->getArgOperand(1)->stripPointerCasts(); + if (KillingPtr != KilledPtr && !AA.isMustAlias(KillingPtr, KilledPtr)) return OW_Unknown; // Masks. - // TODO: check that Later's mask is a superset of the Earlier's mask. - if (IIL->getArgOperand(3) != IIE->getArgOperand(3)) + // TODO: check that Killing's mask is a superset of the Killed's mask. + if (KillingII->getArgOperand(3) != KilledII->getArgOperand(3)) return OW_Unknown; return OW_Complete; } -/// Return 'OW_Complete' if a store to the 'Later' location completely -/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the -/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the -/// beginning of the 'Earlier' location is overwritten by 'Later'. -/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was -/// overwritten by a latter (smaller) store which doesn't write outside the big +/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely +/// overwrites a store to the 'KilledLoc' location, 'OW_End' if the end of the +/// 'KilledLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin' +/// if the beginning of the 'KilledLoc' location is overwritten by 'KillingLoc'. +/// 'OW_PartialEarlierWithFullLater' means that an killed (big) store was +/// overwritten by a killing (smaller) store which doesn't write outside the big /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined. -/// NOTE: This function must only be called if both \p Later and \p Earlier -/// write to the same underlying object with valid \p EarlierOff and \p -/// LaterOff. -static OverwriteResult isPartialOverwrite(const MemoryLocation &Later, - const MemoryLocation &Earlier, - int64_t EarlierOff, int64_t LaterOff, - Instruction *DepWrite, +/// NOTE: This function must only be called if both \p KillingLoc and \p +/// KilledLoc belong to the same underlying object with valid \p KillingOff and +/// \p KilledOff. +static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, + const MemoryLocation &KilledLoc, + int64_t KillingOff, int64_t KilledOff, + Instruction *KilledI, InstOverlapIntervalsTy &IOL) { - const uint64_t LaterSize = Later.Size.getValue(); - const uint64_t EarlierSize = Earlier.Size.getValue(); + const uint64_t KillingSize = KillingLoc.Size.getValue(); + const uint64_t KilledSize = KilledLoc.Size.getValue(); // We may now overlap, although the overlap is not complete. There might also // be other incomplete overlaps, and together, they might cover the complete - // earlier write. + // killed store. // Note: The correctness of this logic depends on the fact that this function // is not even called providing DepWrite when there are any intervening reads. if (EnablePartialOverwriteTracking && - LaterOff < int64_t(EarlierOff + EarlierSize) && - int64_t(LaterOff + LaterSize) >= EarlierOff) { + KillingOff < int64_t(KilledOff + KilledSize) && + int64_t(KillingOff + KillingSize) >= KilledOff) { // Insert our part of the overlap into the map. - auto &IM = IOL[DepWrite]; - LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff - << ", " << int64_t(EarlierOff + EarlierSize) - << ") Later [" << LaterOff << ", " - << int64_t(LaterOff + LaterSize) << ")\n"); + auto &IM = IOL[KilledI]; + LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: KilledLoc [" << KilledOff + << ", " << int64_t(KilledOff + KilledSize) + << ") KillingLoc [" << KillingOff << ", " + << int64_t(KillingOff + KillingSize) << ")\n"); // Make sure that we only insert non-overlapping intervals and combine // adjacent intervals. The intervals are stored in the map with the ending // offset as the key (in the half-open sense) and the starting offset as // the value. - int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize; + int64_t KillingIntStart = KillingOff; + int64_t KillingIntEnd = KillingOff + KillingSize; - // Find any intervals ending at, or after, LaterIntStart which start - // before LaterIntEnd. - auto ILI = IM.lower_bound(LaterIntStart); - if (ILI != IM.end() && ILI->second <= LaterIntEnd) { + // Find any intervals ending at, or after, KillingIntStart which start + // before KillingIntEnd. + auto ILI = IM.lower_bound(KillingIntStart); + if (ILI != IM.end() && ILI->second <= KillingIntEnd) { // This existing interval is overlapped with the current store somewhere - // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing + // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing // intervals and adjusting our start and end. - LaterIntStart = std::min(LaterIntStart, ILI->second); - LaterIntEnd = std::max(LaterIntEnd, ILI->first); + KillingIntStart = std::min(KillingIntStart, ILI->second); + KillingIntEnd = std::max(KillingIntEnd, ILI->first); ILI = IM.erase(ILI); // Continue erasing and adjusting our end in case other previous // intervals are also overlapped with the current store. // - // |--- ealier 1 ---| |--- ealier 2 ---| - // |------- later---------| + // |--- killed 1 ---| |--- killed 2 ---| + // |------- killing---------| // - while (ILI != IM.end() && ILI->second <= LaterIntEnd) { - assert(ILI->second > LaterIntStart && "Unexpected interval"); - LaterIntEnd = std::max(LaterIntEnd, ILI->first); + while (ILI != IM.end() && ILI->second <= KillingIntEnd) { + assert(ILI->second > KillingIntStart && "Unexpected interval"); + KillingIntEnd = std::max(KillingIntEnd, ILI->first); ILI = IM.erase(ILI); } } - IM[LaterIntEnd] = LaterIntStart; + IM[KillingIntEnd] = KillingIntStart; ILI = IM.begin(); - if (ILI->second <= EarlierOff && - ILI->first >= int64_t(EarlierOff + EarlierSize)) { - LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier [" - << EarlierOff << ", " - << int64_t(EarlierOff + EarlierSize) - << ") Composite Later [" << ILI->second << ", " + if (ILI->second <= KilledOff && + ILI->first >= int64_t(KilledOff + KilledSize)) { + LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: KilledLoc [" + << KilledOff << ", " + << int64_t(KilledOff + KilledSize) + << ") Composite KillingLoc [" << ILI->second << ", " << ILI->first << ")\n"); ++NumCompletePartials; return OW_Complete; } } - // Check for an earlier store which writes to all the memory locations that - // the later store writes to. - if (EnablePartialStoreMerging && LaterOff >= EarlierOff && - int64_t(EarlierOff + EarlierSize) > LaterOff && - uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) { - LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load [" - << EarlierOff << ", " - << int64_t(EarlierOff + EarlierSize) - << ") by a later store [" << LaterOff << ", " - << int64_t(LaterOff + LaterSize) << ")\n"); + // Check for an killed store which writes to all the memory locations that + // the killing store writes to. + if (EnablePartialStoreMerging && KillingOff >= KilledOff && + int64_t(KilledOff + KilledSize) > KillingOff && + uint64_t(KillingOff - KilledOff) + KillingSize <= KilledSize) { + LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an killed load [" + << KilledOff << ", " + << int64_t(KilledOff + KilledSize) + << ") by a killing store [" << KillingOff << ", " + << int64_t(KillingOff + KillingSize) << ")\n"); // TODO: Maybe come up with a better name? return OW_PartialEarlierWithFullLater; } - // Another interesting case is if the later store overwrites the end of the - // earlier store. + // Another interesting case is if the killing store overwrites the end of the + // killed store. // - // |--earlier--| - // |-- later --| + // |--killed--| + // |-- killing --| // - // In this case we may want to trim the size of earlier to avoid generating - // writes to addresses which will definitely be overwritten later + // In this case we may want to trim the size of killed store to avoid + // generating stores to addresses which will definitely be overwritten killing + // store. if (!EnablePartialOverwriteTracking && - (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) && - int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize))) + (KillingOff > KilledOff && KillingOff < int64_t(KilledOff + KilledSize) && + int64_t(KillingOff + KillingSize) >= int64_t(KilledOff + KilledSize))) return OW_End; - // Finally, we also need to check if the later store overwrites the beginning - // of the earlier store. + // Finally, we also need to check if the killing store overwrites the + // beginning of the killed store. // - // |--earlier--| - // |-- later --| + // |--killed--| + // |-- killing --| // // In this case we may want to move the destination address and trim the size - // of earlier to avoid generating writes to addresses which will definitely - // be overwritten later. + // of killed store to avoid generating stores to addresses which will + // definitely be overwritten killing store. if (!EnablePartialOverwriteTracking && - (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) { - assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) && + (KillingOff <= KilledOff && + int64_t(KillingOff + KillingSize) > KilledOff)) { + assert(int64_t(KillingOff + KillingSize) < int64_t(KilledOff + KilledSize) && "Expect to be handled as OW_Complete"); return OW_Begin; } @@ -569,11 +572,11 @@ return true; } -static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart, - uint64_t &EarlierSize, int64_t LaterStart, - uint64_t LaterSize, bool IsOverwriteEnd) { - auto *EarlierIntrinsic = cast(EarlierWrite); - Align PrefAlign = EarlierIntrinsic->getDestAlign().valueOrOne(); +static bool tryToShorten(Instruction *KilledI, int64_t &KilledStart, + uint64_t &KilledSize, int64_t KillingStart, + uint64_t KillingSize, bool IsOverwriteEnd) { + auto *KilledIntrinsic = cast(KilledI); + Align PrefAlign = KilledIntrinsic->getDestAlign().valueOrOne(); // We assume that memet/memcpy operates in chunks of the "largest" native // type size and aligned on the same value. That means optimal start and size @@ -594,19 +597,19 @@ // Compute start and size of the region to remove. Make sure 'PrefAlign' is // maintained on the remaining store. if (IsOverwriteEnd) { - // Calculate required adjustment for 'LaterStart'in order to keep remaining - // store size aligned on 'PerfAlign'. + // Calculate required adjustment for 'KillingStart' in order to keep + // remaining store size aligned on 'PerfAlign'. uint64_t Off = - offsetToAlignment(uint64_t(LaterStart - EarlierStart), PrefAlign); - ToRemoveStart = LaterStart + Off; - if (EarlierSize <= uint64_t(ToRemoveStart - EarlierStart)) + offsetToAlignment(uint64_t(KillingStart - KilledStart), PrefAlign); + ToRemoveStart = KillingStart + Off; + if (KilledSize <= uint64_t(ToRemoveStart - KilledStart)) return false; - ToRemoveSize = EarlierSize - uint64_t(ToRemoveStart - EarlierStart); + ToRemoveSize = KilledSize - uint64_t(ToRemoveStart - KilledStart); } else { - ToRemoveStart = EarlierStart; - assert(LaterSize >= uint64_t(EarlierStart - LaterStart) && + ToRemoveStart = KilledStart; + assert(KillingSize >= uint64_t(KilledStart - KillingStart) && "Not overlapping accesses?"); - ToRemoveSize = LaterSize - uint64_t(EarlierStart - LaterStart); + ToRemoveSize = KillingSize - uint64_t(KilledStart - KillingStart); // Calculate required adjustment for 'ToRemoveSize'in order to keep // start of the remaining store aligned on 'PerfAlign'. uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign); @@ -620,10 +623,10 @@ } assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove"); - assert(EarlierSize > ToRemoveSize && "Can't remove more than original size"); + assert(KilledSize > ToRemoveSize && "Can't remove more than original size"); - uint64_t NewSize = EarlierSize - ToRemoveSize; - if (auto *AMI = dyn_cast(EarlierWrite)) { + uint64_t NewSize = KilledSize - ToRemoveSize; + if (auto *AMI = dyn_cast(KilledI)) { // When shortening an atomic memory intrinsic, the newly shortened // length must remain an integer multiple of the element size. const uint32_t ElementSize = AMI->getElementSizeInBytes(); @@ -633,64 +636,64 @@ LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " << (IsOverwriteEnd ? "END" : "BEGIN") << ": " - << *EarlierWrite << "\n KILLER [" << ToRemoveStart << ", " + << *KilledI << "\n KILLER [" << ToRemoveStart << ", " << int64_t(ToRemoveStart + ToRemoveSize) << ")\n"); - Value *EarlierWriteLength = EarlierIntrinsic->getLength(); + Value *KilledWriteLength = KilledIntrinsic->getLength(); Value *TrimmedLength = - ConstantInt::get(EarlierWriteLength->getType(), NewSize); - EarlierIntrinsic->setLength(TrimmedLength); - EarlierIntrinsic->setDestAlignment(PrefAlign); + ConstantInt::get(KilledWriteLength->getType(), NewSize); + KilledIntrinsic->setLength(TrimmedLength); + KilledIntrinsic->setDestAlignment(PrefAlign); if (!IsOverwriteEnd) { - Value *OrigDest = EarlierIntrinsic->getRawDest(); + Value *OrigDest = KilledIntrinsic->getRawDest(); Type *Int8PtrTy = - Type::getInt8PtrTy(EarlierIntrinsic->getContext(), + Type::getInt8PtrTy(KilledIntrinsic->getContext(), OrigDest->getType()->getPointerAddressSpace()); Value *Dest = OrigDest; if (OrigDest->getType() != Int8PtrTy) - Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", EarlierWrite); + Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", KilledI); Value *Indices[1] = { - ConstantInt::get(EarlierWriteLength->getType(), ToRemoveSize)}; + ConstantInt::get(KilledWriteLength->getType(), ToRemoveSize)}; Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds( - Type::getInt8Ty(EarlierIntrinsic->getContext()), - Dest, Indices, "", EarlierWrite); - NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc()); + Type::getInt8Ty(KilledIntrinsic->getContext()), + Dest, Indices, "", KilledI); + NewDestGEP->setDebugLoc(KilledIntrinsic->getDebugLoc()); if (NewDestGEP->getType() != OrigDest->getType()) NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(), - "", EarlierWrite); - EarlierIntrinsic->setDest(NewDestGEP); + "", KilledI); + KilledIntrinsic->setDest(NewDestGEP); } - // Finally update start and size of earlier access. + // Finally update start and size of killed access. if (!IsOverwriteEnd) - EarlierStart += ToRemoveSize; - EarlierSize = NewSize; + KilledStart += ToRemoveSize; + KilledSize = NewSize; return true; } -static bool tryToShortenEnd(Instruction *EarlierWrite, +static bool tryToShortenEnd(Instruction *KilledI, OverlapIntervalsTy &IntervalMap, - int64_t &EarlierStart, uint64_t &EarlierSize) { - if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite)) + int64_t &KilledStart, uint64_t &KilledSize) { + if (IntervalMap.empty() || !isShortenableAtTheEnd(KilledI)) return false; OverlapIntervalsTy::iterator OII = --IntervalMap.end(); - int64_t LaterStart = OII->second; - uint64_t LaterSize = OII->first - LaterStart; + int64_t KillingStart = OII->second; + uint64_t KillingSize = OII->first - KillingStart; - assert(OII->first - LaterStart >= 0 && "Size expected to be positive"); + assert(OII->first - KillingStart >= 0 && "Size expected to be positive"); - if (LaterStart > EarlierStart && - // Note: "LaterStart - EarlierStart" is known to be positive due to + if (KillingStart > KilledStart && + // Note: "KillingStart - KillingStart" is known to be positive due to // preceding check. - (uint64_t)(LaterStart - EarlierStart) < EarlierSize && - // Note: "EarlierSize - (uint64_t)(LaterStart - EarlierStart)" is known to + (uint64_t)(KillingStart - KilledStart) < KilledSize && + // Note: "KilledSize - (uint64_t)(KillingStart - KilledStart)" is known to // be non negative due to preceding checks. - LaterSize >= EarlierSize - (uint64_t)(LaterStart - EarlierStart)) { - if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, - LaterSize, true)) { + KillingSize >= KilledSize - (uint64_t)(KillingStart - KilledStart)) { + if (tryToShorten(KilledI, KilledStart, KilledSize, KillingStart, + KillingSize, true)) { IntervalMap.erase(OII); return true; } @@ -698,28 +701,28 @@ return false; } -static bool tryToShortenBegin(Instruction *EarlierWrite, +static bool tryToShortenBegin(Instruction *KilledI, OverlapIntervalsTy &IntervalMap, - int64_t &EarlierStart, uint64_t &EarlierSize) { - if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite)) + int64_t &KilledStart, uint64_t &KilledSize) { + if (IntervalMap.empty() || !isShortenableAtTheBeginning(KilledI)) return false; OverlapIntervalsTy::iterator OII = IntervalMap.begin(); - int64_t LaterStart = OII->second; - uint64_t LaterSize = OII->first - LaterStart; + int64_t KillingStart = OII->second; + uint64_t KillingSize = OII->first - KillingStart; - assert(OII->first - LaterStart >= 0 && "Size expected to be positive"); + assert(OII->first - KillingStart >= 0 && "Size expected to be positive"); - if (LaterStart <= EarlierStart && - // Note: "EarlierStart - LaterStart" is known to be non negative due to + if (KillingStart <= KilledStart && + // Note: "KilledStart - KillingStart" is known to be non negative due to // preceding check. - LaterSize > (uint64_t)(EarlierStart - LaterStart)) { - // Note: "LaterSize - (uint64_t)(EarlierStart - LaterStart)" is known to be + KillingSize > (uint64_t)(KilledStart - KillingStart)) { + // Note: "KillingSize - (uint64_t)(KilledStart - KilledStart)" is known to be // positive due to preceding checks. - assert(LaterSize - (uint64_t)(EarlierStart - LaterStart) < EarlierSize && + assert(KillingSize - (uint64_t)(KilledStart - KillingStart) < KilledSize && "Should have been handled as OW_Complete"); - if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, - LaterSize, false)) { + if (tryToShorten(KilledI, KilledStart, KilledSize, KillingStart, + KillingSize, false)) { IntervalMap.erase(OII); return true; } @@ -732,66 +735,67 @@ const TargetLibraryInfo &TLI) { bool Changed = false; for (auto OI : IOL) { - Instruction *EarlierWrite = OI.first; - MemoryLocation Loc = getLocForWrite(EarlierWrite, TLI); - assert(isRemovable(EarlierWrite) && "Expect only removable instruction"); + Instruction *KilledI = OI.first; + MemoryLocation Loc = getLocForWrite(KilledI, TLI); + assert(isRemovable(KilledI) && "Expect only removable instruction"); const Value *Ptr = Loc.Ptr->stripPointerCasts(); - int64_t EarlierStart = 0; - uint64_t EarlierSize = Loc.Size.getValue(); - GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL); + int64_t KilledStart = 0; + uint64_t KilledSize = Loc.Size.getValue(); + GetPointerBaseWithConstantOffset(Ptr, KilledStart, DL); OverlapIntervalsTy &IntervalMap = OI.second; Changed |= - tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); + tryToShortenEnd(KilledI, IntervalMap, KilledStart, KilledSize); if (IntervalMap.empty()) continue; Changed |= - tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); + tryToShortenBegin(KilledI, IntervalMap, KilledStart, KilledSize); } return Changed; } static Constant *tryToMergePartialOverlappingStores( - StoreInst *Earlier, StoreInst *Later, int64_t InstWriteOffset, - int64_t DepWriteOffset, const DataLayout &DL, BatchAAResults &AA, + StoreInst *KillingI, StoreInst *KilledI, int64_t KillingOffset, + int64_t KilledOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT) { - if (Earlier && isa(Earlier->getValueOperand()) && - DL.typeSizeEqualsStoreSize(Earlier->getValueOperand()->getType()) && - Later && isa(Later->getValueOperand()) && - DL.typeSizeEqualsStoreSize(Later->getValueOperand()->getType()) && - memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) { + if (KilledI && isa(KilledI->getValueOperand()) && + DL.typeSizeEqualsStoreSize(KilledI->getValueOperand()->getType()) && + KillingI && isa(KillingI->getValueOperand()) && + DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) && + memoryIsNotModifiedBetween(KilledI, KillingI, AA, DL, DT)) { // If the store we find is: // a) partially overwritten by the store to 'Loc' - // b) the later store is fully contained in the earlier one and + // b) the killing store is fully contained in the killed one and // c) they both have a constant value // d) none of the two stores need padding - // Merge the two stores, replacing the earlier store's value with a + // Merge the two stores, replacing the killed store's value with a // merge of both values. // TODO: Deal with other constant types (vectors, etc), and probably // some mem intrinsics (if needed) - APInt EarlierValue = - cast(Earlier->getValueOperand())->getValue(); - APInt LaterValue = cast(Later->getValueOperand())->getValue(); - unsigned LaterBits = LaterValue.getBitWidth(); - assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth()); - LaterValue = LaterValue.zext(EarlierValue.getBitWidth()); + APInt KilledValue = + cast(KilledI->getValueOperand())->getValue(); + APInt KillingValue = + cast(KillingI->getValueOperand())->getValue(); + unsigned KillingBits = KillingValue.getBitWidth(); + assert(KilledValue.getBitWidth() > KillingValue.getBitWidth()); + KillingValue = KillingValue.zext(KilledValue.getBitWidth()); // Offset of the smaller store inside the larger store - unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8; - unsigned LShiftAmount = DL.isBigEndian() ? EarlierValue.getBitWidth() - - BitOffsetDiff - LaterBits + unsigned BitOffsetDiff = (KillingOffset - KilledOffset) * 8; + unsigned LShiftAmount = DL.isBigEndian() ? KilledValue.getBitWidth() - + BitOffsetDiff - KillingBits : BitOffsetDiff; - APInt Mask = APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount, - LShiftAmount + LaterBits); + APInt Mask = APInt::getBitsSet(KilledValue.getBitWidth(), LShiftAmount, + LShiftAmount + KillingBits); // Clear the bits we'll be replacing, then OR with the smaller // store, shifted appropriately. - APInt Merged = (EarlierValue & ~Mask) | (LaterValue << LShiftAmount); - LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *Earlier - << "\n Later: " << *Later + APInt Merged = (KilledValue & ~Mask) | (KillingValue << LShiftAmount); + LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Killed: " << *KilledI + << "\n Killing: " << *KillingI << "\n Merged Value: " << Merged << '\n'); - return ConstantInt::get(Earlier->getValueOperand()->getType(), Merged); + return ConstantInt::get(KilledI->getValueOperand()->getType(), Merged); } return nullptr; } @@ -936,138 +940,141 @@ return State; } - /// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI - /// instruction) completely overwrites a store to the 'Earlier' location. - /// (by \p EarlierI instruction). - /// Return OW_MaybePartial if \p Later does not completely overwrite - /// \p Earlier, but they both write to the same underlying object. In that - /// case, use isPartialOverwrite to check if \p Later partially overwrites - /// \p Earlier. Returns 'OR_None' if later is known to not overwrite the - /// earlier. 'OW_Unknown' if nothing can be determined. + /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p KillingI + /// instruction) completely overwrites a store to the 'KilledLoc' location. + /// (by \p KilledI instruction). + /// Return OW_MaybePartial if \p KillingI does not completely overwrite + /// \p KilledI, but they both write to the same underlying object. In that + /// case, use isPartialOverwrite to check if \p KillingI partially overwrites + /// \p KilledI. Returns 'OR_None' if \p KillingI is known to not overwrite the + /// \p KilledI. 'OW_Unknown' if nothing can be determined. /// /// Parameters: - /// LaterI/EarlierI - Instructions accessing \p Later/Earlier memory + /// KillingI/KilledI - Instructions accessing \p KillingLoc/KilledLoc memory /// locations - /// Later/Earlier - Memory locations under question - /// EarlierOff/LaterOff [in, out] - On input, may provide initial offset of - /// access relative to Later/Earlier memory location. On output, if - /// returned value is not "OW_Unknown" will keep absolute offset of access - /// relative to some common base of Later and Earlier locations. - OverwriteResult isOverwrite(const Instruction *LaterI, - const Instruction *EarlierI, - const MemoryLocation &Later, - const MemoryLocation &Earlier, - int64_t &EarlierOff, int64_t &LaterOff) { + /// KillingLoc/KilledLoc - Memory locations under question + /// KillingOff/KilledOff [in, out] - On input, may provide initial offset of + /// access relative to \p KillingLoc/KilledLoc. On output, if returned + /// value is not "OW_Unknown" will keep absolute offset of access + /// relative to some common base of \p KillingLoc and \p KilledLoc + /// locations. + OverwriteResult isOverwrite(const Instruction *KillingI, + const Instruction *KilledI, + const MemoryLocation &KillingLoc, + const MemoryLocation &KilledLoc, + int64_t &KillingOff, int64_t &KilledOff) { // AliasAnalysis does not always account for loops. Limit overwrite checks - // to dependencies for which we can guarantee they are independant of any + // to dependencies for which we can guarantee they are independent of any // loops they are in. - if (!isGuaranteedLoopIndependent(EarlierI, LaterI, Earlier)) + if (!isGuaranteedLoopIndependent(KilledI, KillingI, KilledLoc)) return OW_Unknown; // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll // get imprecise values here, though (except for unknown sizes). - if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) { + if (!KillingLoc.Size.isPrecise() || !KilledLoc.Size.isPrecise()) { // In case no constant size is known, try to an IR values for the number // of bytes written and check if they match. - const auto *LaterMemI = dyn_cast(LaterI); - const auto *EarlierMemI = dyn_cast(EarlierI); - if (LaterMemI && EarlierMemI) { - const Value *LaterV = LaterMemI->getLength(); - const Value *EarlierV = EarlierMemI->getLength(); - if (LaterV == EarlierV && EarlierOff == LaterOff && - BatchAA.isMustAlias(Earlier, Later)) + const auto *KillingMemI = dyn_cast(KillingI); + const auto *KilledMemI = dyn_cast(KilledI); + if (KillingMemI && KilledMemI) { + const Value *KillingV = KillingMemI->getLength(); + const Value *KilledV = KilledMemI->getLength(); + if (KillingV == KilledV && KilledOff == KillingOff && + BatchAA.isMustAlias(KilledLoc, KillingLoc)) return OW_Complete; } // Masked stores have imprecise locations, but we can reason about them // to some extent. - return isMaskedStoreOverwrite(LaterI, EarlierI, BatchAA); + return isMaskedStoreOverwrite(KillingI, KilledI, BatchAA); } - const uint64_t LaterSize = Later.Size.getValue(); - const uint64_t EarlierSize = Earlier.Size.getValue(); + const uint64_t KillingSize = KillingLoc.Size.getValue(); + const uint64_t KilledSize = KilledLoc.Size.getValue(); // Query the alias information - AliasResult AAR = BatchAA.alias(Later, Earlier); + AliasResult AAR = BatchAA.alias(KillingLoc, KilledLoc); // If the start pointers are the same, it's enough to compare sizes and - // offsets to see if the later store fully overwrites the earlier one. + // offsets to see if the killing store fully overwrites the killed one. // If we hit a partial alias we may have a full overwrite if (AAR == AliasResult::MustAlias || (AAR == AliasResult::PartialAlias && AAR.hasOffset() && AAR.getOffset() >= 0)) { - int64_t AdjEarlierOff = - EarlierOff + (AAR == AliasResult::PartialAlias ? AAR.getOffset() : 0); - // Make sure that the earlier access is "inside" the later one. - if (AdjEarlierOff >= LaterOff && - uint64_t(AdjEarlierOff - LaterOff) + EarlierSize <= LaterSize) { - EarlierOff = AdjEarlierOff; + int64_t AdjKilledOff = + KilledOff + (AAR == AliasResult::PartialAlias ? AAR.getOffset() : 0); + // Make sure that the killed access is "inside" the killing one. + if (AdjKilledOff >= KillingOff && + uint64_t(AdjKilledOff - KillingOff) + KilledSize <= KillingSize) { + KilledOff = AdjKilledOff; return OW_Complete; } } - // Check to see if the later store is to the entire object (either a + // Check to see if the killing store is to the entire object (either a // global, an alloca, or a byval/inalloca argument). If so, then it clearly // overwrites any other store to the same object. - const Value *P1 = Earlier.Ptr->stripPointerCasts(); - const Value *P2 = Later.Ptr->stripPointerCasts(); - const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2); + const Value *KilledPtr = KilledLoc.Ptr->stripPointerCasts(); + const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts(); + const Value *KilledUndObj = getUnderlyingObject(KilledPtr); + const Value *KillingUndObj = getUnderlyingObject(KillingPtr); // If we can't resolve the same pointers to the same object, then we can't // analyze them at all. - if (UO1 != UO2) + if (KilledUndObj != KillingUndObj) return OW_Unknown; - // If the "Later" store is to a recognizable object, get its size. - uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, &F); - if (ObjectSize != MemoryLocation::UnknownSize) - if (ObjectSize == LaterSize && ObjectSize >= EarlierSize) + // If the KillingI store is to a recognizable object, get its size. + uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F); + if (KillingUndObjSize != MemoryLocation::UnknownSize) + if (KillingUndObjSize == KillingSize && KillingUndObjSize >= KilledSize) return OW_Complete; // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the // base pointers are equal, then we can reason about the two stores. - int64_t ExtraEarlierOff = 0; - int64_t ExtraLaterOff = 0; - const Value *BP1 = - GetPointerBaseWithConstantOffset(P1, ExtraEarlierOff, DL); - const Value *BP2 = GetPointerBaseWithConstantOffset(P2, ExtraLaterOff, DL); - EarlierOff += ExtraEarlierOff; - LaterOff += ExtraLaterOff; + int64_t ExtraKilledOff = 0; + int64_t ExtraKillingOff = 0; + const Value *KilledBasePtr = + GetPointerBaseWithConstantOffset(KilledPtr, ExtraKilledOff, DL); + const Value *KillingBasePtr = + GetPointerBaseWithConstantOffset(KillingPtr, ExtraKillingOff, DL); + KilledOff += ExtraKilledOff; + KillingOff += ExtraKillingOff; // If the base pointers still differ, we have two completely different // stores. - if (BP1 != BP2) + if (KilledBasePtr != KillingBasePtr) return OW_Unknown; - // The later access completely overlaps the earlier store if and only if - // both start and end of the earlier one is "inside" the later one: - // |<->|--earlier--|<->| - // |-------later-------| + // The killing access completely overlaps the killed store if and only if + // both start and end of the killed one is "inside" the killing one: + // |<->|--killed--|<->| + // |-----killing------| // Accesses may overlap if and only if start of one of them is "inside" // another one: - // |<->|--earlier--|<----->| - // |-------later-------| + // |<->|--killed--|<-------->| + // |-------killing--------| // OR - // |----- earlier -----| - // |<->|---later---|<----->| + // |-------killed-------| + // |<->|---killing---|<----->| // // We have to be careful here as *Off is signed while *.Size is unsigned. - // Check if the earlier access starts "not before" the later one. - if (EarlierOff >= LaterOff) { - // If the earlier access ends "not after" the later access then the - // earlier one is completely overwritten by the later one. - if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) + // Check if the killed access starts "not before" the killing one. + if (KilledOff >= KillingOff) { + // If the killed access ends "not after" the killing access then the + // killed one is completely overwritten by the killing one. + if (uint64_t(KilledOff - KillingOff) + KilledSize <= KillingSize) return OW_Complete; - // If start of the earlier access is "before" end of the later access + // If start of the killed access is "before" end of the killing access // then accesses overlap. - else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize) + else if ((uint64_t)(KilledOff - KillingOff) < KillingSize) return OW_MaybePartial; } - // If start of the later access is "before" end of the earlier access then + // If start of the killing access is "before" end of the killed access then // accesses overlap. - else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) { + else if ((uint64_t)(KillingOff - KilledOff) < KilledSize) { return OW_MaybePartial; } @@ -1163,8 +1170,8 @@ int64_t InstWriteOffset = 0; int64_t DepWriteOffset = 0; if (auto CC = getLocForWriteEx(UseInst)) - return isOverwrite(UseInst, DefInst, *CC, DefLoc, DepWriteOffset, - InstWriteOffset) == OW_Complete; + return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset, + DepWriteOffset) == OW_Complete; return false; } @@ -1269,8 +1276,8 @@ } int64_t InstWriteOffset = 0; int64_t DepWriteOffset = 0; - return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DepWriteOffset, - InstWriteOffset) == OW_Complete; + return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset, + DepWriteOffset) == OW_Complete; } // Returns true if \p Use may read from \p DefLoc. @@ -1302,8 +1309,8 @@ int64_t DummyDefOffset = DefOffset; int64_t DummyUseOffset = UseOffset; if (UseLoc && - isOverwrite(DefInst, UseInst, DefLoc, *UseLoc, DummyUseOffset, - DummyDefOffset) == OW_None) + isOverwrite(DefInst, UseInst, DefLoc, *UseLoc, DummyDefOffset, + DummyUseOffset) == OW_None) return false; // NOTE: For calls, the number of stores removed could be slightly improved @@ -1376,7 +1383,7 @@ // MemoryUse (read). Optional getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess, - const MemoryLocation &DefLoc, const Value *DefUO, + const MemoryLocation &KillingLoc, const Value *KillingUndObj, unsigned &ScanLimit, unsigned &WalkerStepLimit, bool IsMemTerm, unsigned &PartialLimit) { if (ScanLimit == 0 || WalkerStepLimit == 0) { @@ -1428,19 +1435,19 @@ MemoryDef *CurrentDef = cast(Current); Instruction *CurrentI = CurrentDef->getMemoryInst(); - if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO))) + if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(KillingUndObj))) continue; // Before we try to remove anything, check for any extra throwing // instructions that block us from DSEing - if (mayThrowBetween(KillingI, CurrentI, DefUO)) { + if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) { LLVM_DEBUG(dbgs() << " ... skip, may throw!\n"); return None; } // Check for anything that looks like it will be a barrier to further // removal - if (isDSEBarrier(DefUO, CurrentI)) { + if (isDSEBarrier(KillingUndObj, CurrentI)) { LLVM_DEBUG(dbgs() << " ... skip, barrier\n"); return None; } @@ -1455,16 +1462,16 @@ // for intrinsic calls, because the code knows how to handle memcpy // intrinsics. if (!isa(CurrentI) && - isReadClobber(KillingI, DefLoc, 0, CurrentI, CurrentLoc, 0)) + isReadClobber(KillingI, KillingLoc, 0, CurrentI, CurrentLoc, 0)) return None; // Quick check if there are direct uses that are read-clobbers. - if (any_of(Current->uses(), [this, KillingI, &DefLoc, + if (any_of(Current->uses(), [this, KillingI, &KillingLoc, StartAccess](Use &U) { if (auto *UseOrDef = dyn_cast(U.getUser())) { auto *UseInst = UseOrDef->getMemoryInst(); return !MSSA.dominates(StartAccess, UseOrDef) && - isReadClobber(KillingI, DefLoc, 0, UseInst); + isReadClobber(KillingI, KillingLoc, 0, UseInst); } return false; })) { @@ -1493,10 +1500,10 @@ if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) continue; } else { - int64_t InstWriteOffset = 0; - int64_t DepWriteOffset = 0; - auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, - DepWriteOffset, InstWriteOffset); + int64_t KillingOffset = 0; + int64_t KilledOffset = 0; + auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc, + KillingOffset, KilledOffset); // If Current does not write to the same object as KillingDef, check // the next candidate. if (OR == OW_Unknown || OR == OW_None) @@ -1520,28 +1527,29 @@ // Accesses to objects accessible after the function returns can only be // eliminated if the access is killed along all paths to the exit. Collect // the blocks with killing (=completely overwriting MemoryDefs) and check if - // they cover all paths from EarlierAccess to any function exit. + // they cover all paths from KilledAccess to any function exit. SmallPtrSet KillingDefs; KillingDefs.insert(KillingDef->getMemoryInst()); - MemoryAccess *EarlierAccess = Current; - Instruction *EarlierMemInst = - cast(EarlierAccess)->getMemoryInst(); - LLVM_DEBUG(dbgs() << " Checking for reads of " << *EarlierAccess << " (" - << *EarlierMemInst << ")\n"); + MemoryAccess *KilledAccess = Current; + MemoryLocation KilledLoc = *CurrentLoc; + Instruction *KilledI = + cast(KilledAccess)->getMemoryInst(); + LLVM_DEBUG(dbgs() << " Checking for reads of " << *KilledAccess << " (" + << *KilledI << ")\n"); SmallSetVector WorkList; auto PushMemUses = [&WorkList](MemoryAccess *Acc) { for (Use &U : Acc->uses()) WorkList.insert(cast(U.getUser())); }; - PushMemUses(EarlierAccess); + PushMemUses(KilledAccess); // Optimistically collect all accesses for reads. If we do not find any // read clobbers, add them to the cache. SmallPtrSet KnownNoReads; - if (!EarlierMemInst->mayReadFromMemory()) - KnownNoReads.insert(EarlierAccess); - // Check if EarlierDef may be read. + if (!KilledI->mayReadFromMemory()) + KnownNoReads.insert(KilledAccess); + // Check if KilledDef may be read. for (unsigned I = 0; I < WorkList.size(); I++) { MemoryAccess *UseAccess = WorkList[I]; @@ -1580,7 +1588,7 @@ // A memory terminator kills all preceeding MemoryDefs and all succeeding // MemoryAccesses. We do not have to check it's users. - if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) { + if (isMemTerminator(KilledLoc, KilledI, UseInst)) { LLVM_DEBUG( dbgs() << " ... skipping, memterminator invalidates following accesses\n"); @@ -1593,14 +1601,14 @@ continue; } - if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) { + if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj)) { LLVM_DEBUG(dbgs() << " ... found throwing instruction\n"); return None; } // Uses which may read the original MemoryDef mean we cannot eliminate the // original MD. Stop walk. - if (isReadClobber(EarlierMemInst, *CurrentLoc, 0, UseInst)) { + if (isReadClobber(KilledI, KilledLoc, 0, UseInst)) { LLVM_DEBUG(dbgs() << " ... found read clobber\n"); return None; } @@ -1608,16 +1616,16 @@ // If this worklist walks back to the original memory access (and the // pointer is not guarenteed loop invariant) then we cannot assume that a // store kills itself. - if (EarlierAccess == UseAccess && - !isGuaranteedLoopInvariant(CurrentLoc->Ptr)) { + if (KilledAccess == UseAccess && + !isGuaranteedLoopInvariant(KilledLoc.Ptr)) { LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n"); return None; } - // Otherwise, for the KillingDef and EarlierAccess we only have to check + // Otherwise, for the KillingDef and KilledAccess we only have to check // if it reads the memory location. // TODO: It would probably be better to check for self-reads before // calling the function. - if (KillingDef == UseAccess || EarlierAccess == UseAccess) { + if (KillingDef == UseAccess || KilledAccess == UseAccess) { LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n"); continue; } @@ -1626,18 +1634,18 @@ // the original location. Otherwise we have to check uses of *all* // MemoryDefs we discover, including non-aliasing ones. Otherwise we might // miss cases like the following - // 1 = Def(LoE) ; <----- EarlierDef stores [0,1] + // 1 = Def(LoE) ; <----- KilledDef stores [0,1] // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3] // Use(2) ; MayAlias 2 *and* 1, loads [0, 3]. // (The Use points to the *first* Def it may alias) // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, // stores [0,1] if (MemoryDef *UseDef = dyn_cast(UseAccess)) { - if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) { + if (isCompleteOverwrite(KilledLoc, KilledI, UseInst)) { BasicBlock *MaybeKillingBlock = UseInst->getParent(); if (PostOrderNumbers.find(MaybeKillingBlock)->second < - PostOrderNumbers.find(EarlierAccess->getBlock())->second) { - if (!isInvisibleToCallerAfterRet(DefUO)) { + PostOrderNumbers.find(KilledAccess->getBlock())->second) { + if (!isInvisibleToCallerAfterRet(KillingUndObj)) { LLVM_DEBUG(dbgs() << " ... found killing def " << *UseInst << "\n"); KillingDefs.insert(UseInst); @@ -1654,8 +1662,8 @@ // For accesses to locations visible after the function returns, make sure // that the location is killed (=overwritten) along all paths from - // EarlierAccess to the exit. - if (!isInvisibleToCallerAfterRet(DefUO)) { + // KilledAccess to the exit. + if (!isInvisibleToCallerAfterRet(KillingUndObj)) { SmallPtrSet KillingBlocks; for (Instruction *KD : KillingDefs) KillingBlocks.insert(KD->getParent()); @@ -1672,17 +1680,17 @@ } // If CommonPred is in the set of killing blocks, just check if it - // post-dominates EarlierAccess. + // post-dominates KilledAccess. if (KillingBlocks.count(CommonPred)) { - if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) - return {EarlierAccess}; + if (PDT.dominates(CommonPred, KilledAccess->getBlock())) + return {KilledAccess}; return None; } - // If the common post-dominator does not post-dominate EarlierAccess, - // there is a path from EarlierAccess to an exit not going through a + // If the common post-dominator does not post-dominate KilledAccess, + // there is a path from KilledAccess to an exit not going through a // killing block. - if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) { + if (PDT.dominates(CommonPred, KilledAccess->getBlock())) { SetVector WorkList; // If CommonPred is null, there are multiple exits from the function. @@ -1695,16 +1703,16 @@ NumCFGTries++; // Check if all paths starting from an exit node go through one of the - // killing blocks before reaching EarlierAccess. + // killing blocks before reaching KilledAccess. for (unsigned I = 0; I < WorkList.size(); I++) { NumCFGChecks++; BasicBlock *Current = WorkList[I]; if (KillingBlocks.count(Current)) continue; - if (Current == EarlierAccess->getBlock()) + if (Current == KilledAccess->getBlock()) return None; - // EarlierAccess is reachable from the entry, so we don't have to + // KilledAccess is reachable from the entry, so we don't have to // explore unreachable blocks further. if (!DT.isReachableFromEntry(Current)) continue; @@ -1716,14 +1724,14 @@ return None; } NumCFGSuccess++; - return {EarlierAccess}; + return {KilledAccess}; } return None; } - // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is + // No aliasing MemoryUses of KilledAccess found, KilledAccess is // potentially dead. - return {EarlierAccess}; + return {KilledAccess}; } // Delete dead memory defs @@ -1764,43 +1772,44 @@ } } - // Check for any extra throws between SI and NI that block DSE. This only - // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may - // throw are handled during the walk from one def to the next. - bool mayThrowBetween(Instruction *SI, Instruction *NI, - const Value *SILocUnd) { - // First see if we can ignore it by using the fact that SI is an + // Check for any extra throws between \p KillingI and \p KilledI that block + // DSE. This only checks extra maythrows (those that aren't MemoryDef's). + // MemoryDef that may throw are handled during the walk from one def to the + // next. + bool mayThrowBetween(Instruction *KillingI, Instruction *KilledI, + const Value *KillingUndObj) { + // First see if we can ignore it by using the fact that KillingI is an // alloca/alloca like object that is not visible to the caller during // execution of the function. - if (SILocUnd && isInvisibleToCallerBeforeRet(SILocUnd)) + if (KillingUndObj && isInvisibleToCallerBeforeRet(KillingUndObj)) return false; - if (SI->getParent() == NI->getParent()) - return ThrowingBlocks.count(SI->getParent()); + if (KillingI->getParent() == KilledI->getParent()) + return ThrowingBlocks.count(KillingI->getParent()); return !ThrowingBlocks.empty(); } - // Check if \p NI acts as a DSE barrier for \p SI. The following instructions - // act as barriers: - // * A memory instruction that may throw and \p SI accesses a non-stack + // Check if \p KilledI acts as a DSE barrier for \p KillingI. The following + // instructions act as barriers: + // * A memory instruction that may throw and \p KillingI accesses a non-stack // object. // * Atomic stores stronger that monotonic. - bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) { - // If NI may throw it acts as a barrier, unless we are to an alloca/alloca - // like object that does not escape. - if (NI->mayThrow() && !isInvisibleToCallerBeforeRet(SILocUnd)) + bool isDSEBarrier(const Value *KillingUndObj, Instruction *KilledI) { + // If KilledI may throw it acts as a barrier, unless we are to an + // alloca/alloca like object that does not escape. + if (KilledI->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj)) return true; - // If NI is an atomic load/store stronger than monotonic, do not try to + // If KilledI is an atomic load/store stronger than monotonic, do not try to // eliminate/reorder it. - if (NI->isAtomic()) { - if (auto *LI = dyn_cast(NI)) + if (KilledI->isAtomic()) { + if (auto *LI = dyn_cast(KilledI)) return isStrongerThanMonotonic(LI->getOrdering()); - if (auto *SI = dyn_cast(NI)) + if (auto *SI = dyn_cast(KilledI)) return isStrongerThanMonotonic(SI->getOrdering()); - if (auto *ARMW = dyn_cast(NI)) + if (auto *ARMW = dyn_cast(KilledI)) return isStrongerThanMonotonic(ARMW->getOrdering()); - if (auto *CmpXchg = dyn_cast(NI)) + if (auto *CmpXchg = dyn_cast(KilledI)) return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) || isStrongerThanMonotonic(CmpXchg->getFailureOrdering()); llvm_unreachable("other instructions should be skipped in MemorySSA"); @@ -1934,27 +1943,27 @@ MemoryDef *KillingDef = State.MemDefs[I]; if (State.SkipStores.count(KillingDef)) continue; - Instruction *SI = KillingDef->getMemoryInst(); + Instruction *KillingI = KillingDef->getMemoryInst(); - Optional MaybeSILoc; - if (State.isMemTerminatorInst(SI)) - MaybeSILoc = State.getLocForTerminator(SI).map( + Optional MaybeKillingLoc; + if (State.isMemTerminatorInst(KillingI)) + MaybeKillingLoc = State.getLocForTerminator(KillingI).map( [](const std::pair &P) { return P.first; }); else - MaybeSILoc = State.getLocForWriteEx(SI); + MaybeKillingLoc = State.getLocForWriteEx(KillingI); - if (!MaybeSILoc) { + if (!MaybeKillingLoc) { LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " - << *SI << "\n"); + << *KillingI << "\n"); continue; } - MemoryLocation SILoc = *MaybeSILoc; - assert(SILoc.Ptr && "SILoc should not be null"); - const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr); + MemoryLocation KillingLoc = *MaybeKillingLoc; + assert(KillingLoc.Ptr && "KillingLoc should not be null"); + const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr); MemoryAccess *Current = KillingDef; LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " - << *Current << " (" << *SI << ")\n"); + << *KillingDef << " (" << *KillingI << ")\n"); unsigned ScanLimit = MemorySSAScanLimit; unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit; @@ -1962,34 +1971,34 @@ // Worklist of MemoryAccesses that may be killed by KillingDef. SetVector ToCheck; - if (SILocUnd) + if (KillingUndObj) ToCheck.insert(KillingDef->getDefiningAccess()); bool Shortend = false; - bool IsMemTerm = State.isMemTerminatorInst(SI); + bool IsMemTerm = State.isMemTerminatorInst(KillingI); // Check if MemoryAccesses in the worklist are killed by KillingDef. for (unsigned I = 0; I < ToCheck.size(); I++) { Current = ToCheck[I]; if (State.SkipStores.count(Current)) continue; - Optional Next = State.getDomMemoryDef( - KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit, + Optional MaybeKilledAccess = State.getDomMemoryDef( + KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit, WalkerStepLimit, IsMemTerm, PartialLimit); - if (!Next) { + if (!MaybeKilledAccess) { LLVM_DEBUG(dbgs() << " finished walk\n"); continue; } - MemoryAccess *EarlierAccess = *Next; - LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess); - if (isa(EarlierAccess)) { + MemoryAccess *KilledAccess = *MaybeKilledAccess; + LLVM_DEBUG(dbgs() << " Checking if we can kill " << *KilledAccess); + if (isa(KilledAccess)) { LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n"); - for (Value *V : cast(EarlierAccess)->incoming_values()) { + for (Value *V : cast(KilledAccess)->incoming_values()) { MemoryAccess *IncomingAccess = cast(V); BasicBlock *IncomingBlock = IncomingAccess->getBlock(); - BasicBlock *PhiBlock = EarlierAccess->getBlock(); + BasicBlock *PhiBlock = KilledAccess->getBlock(); // We only consider incoming MemoryAccesses that come before the // MemoryPhi. Otherwise we could discover candidates that do not @@ -2000,73 +2009,74 @@ } continue; } - auto *NextDef = cast(EarlierAccess); - Instruction *NI = NextDef->getMemoryInst(); - LLVM_DEBUG(dbgs() << " (" << *NI << ")\n"); - ToCheck.insert(NextDef->getDefiningAccess()); + auto *KilledDefAccess = cast(KilledAccess); + Instruction *KilledI = KilledDefAccess->getMemoryInst(); + LLVM_DEBUG(dbgs() << " (" << *KilledI << ")\n"); + ToCheck.insert(KilledDefAccess->getDefiningAccess()); NumGetDomMemoryDefPassed++; if (!DebugCounter::shouldExecute(MemorySSACounter)) continue; - MemoryLocation NILoc = *State.getLocForWriteEx(NI); + MemoryLocation KilledLoc = *State.getLocForWriteEx(KilledI); if (IsMemTerm) { - const Value *NIUnd = getUnderlyingObject(NILoc.Ptr); - if (SILocUnd != NIUnd) + const Value *KilledUndObj = getUnderlyingObject(KilledLoc.Ptr); + if (KillingUndObj != KilledUndObj) continue; - LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI - << "\n KILLER: " << *SI << '\n'); - State.deleteDeadInstruction(NI); + LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *KilledI + << "\n KILLER: " << *KillingI << '\n'); + State.deleteDeadInstruction(KilledI); ++NumFastStores; MadeChange = true; } else { - // Check if NI overwrites SI. - int64_t InstWriteOffset = 0; - int64_t DepWriteOffset = 0; - OverwriteResult OR = State.isOverwrite(SI, NI, SILoc, NILoc, - DepWriteOffset, InstWriteOffset); + // Check if KilledI overwrites KillingI. + int64_t KillingOffset = 0; + int64_t KilledOffset = 0; + OverwriteResult OR = + State.isOverwrite(KillingI, KilledI, KillingLoc, KilledLoc, + KillingOffset, KilledOffset); if (OR == OW_MaybePartial) { auto Iter = State.IOLs.insert( std::make_pair( - NI->getParent(), InstOverlapIntervalsTy())); + KilledI->getParent(), InstOverlapIntervalsTy())); auto &IOL = Iter.first->second; - OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset, - NI, IOL); + OR = isPartialOverwrite(KillingLoc, KilledLoc, KillingOffset, + KilledOffset, KilledI, IOL); } if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) { - auto *Earlier = dyn_cast(NI); - auto *Later = dyn_cast(SI); + auto *KilledSI = dyn_cast(KilledI); + auto *KillingSI = dyn_cast(KillingI); // We are re-using tryToMergePartialOverlappingStores, which requires - // Earlier to domiante Later. + // KilledSI to dominate KilledSI. // TODO: implement tryToMergeParialOverlappingStores using MemorySSA. - if (Earlier && Later && DT.dominates(Earlier, Later)) { + if (KilledSI && KillingSI && DT.dominates(KilledSI, KillingSI)) { if (Constant *Merged = tryToMergePartialOverlappingStores( - Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL, + KillingSI, KilledSI, KillingOffset, KilledOffset, State.DL, State.BatchAA, &DT)) { // Update stored value of earlier store to merged constant. - Earlier->setOperand(0, Merged); + KilledSI->setOperand(0, Merged); ++NumModifiedStores; MadeChange = true; Shortend = true; - // Remove later store and remove any outstanding overlap intervals + // Remove killing store and remove any outstanding overlap intervals // for the updated store. - State.deleteDeadInstruction(Later); - auto I = State.IOLs.find(Earlier->getParent()); + State.deleteDeadInstruction(KillingSI); + auto I = State.IOLs.find(KilledSI->getParent()); if (I != State.IOLs.end()) - I->second.erase(Earlier); + I->second.erase(KilledSI); break; } } } if (OR == OW_Complete) { - LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI - << "\n KILLER: " << *SI << '\n'); - State.deleteDeadInstruction(NI); + LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *KilledI + << "\n KILLER: " << *KillingI << '\n'); + State.deleteDeadInstruction(KilledI); ++NumFastStores; MadeChange = true; } @@ -2074,10 +2084,11 @@ } // Check if the store is a no-op. - if (!Shortend && isRemovable(SI) && - State.storeIsNoop(KillingDef, SILoc, SILocUnd)) { - LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n'); - State.deleteDeadInstruction(SI); + if (!Shortend && isRemovable(KillingI) && + State.storeIsNoop(KillingDef, KillingLoc, KillingUndObj)) { + LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI + << '\n'); + State.deleteDeadInstruction(KillingI); NumRedundantStores++; MadeChange = true; continue;