Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -59,6 +59,8 @@ //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// +typedef std::map OverlapIntervalsTy; +typedef DenseMap InstOverlapIntervalsTy; /// Delete this instruction. Before we do, go through and zero out all the /// operands of this instruction. If any of them become dead, delete them and @@ -67,6 +69,7 @@ static void deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI, MemoryDependenceResults &MD, const TargetLibraryInfo &TLI, + InstOverlapIntervalsTy &IOL, SmallSetVector *ValueSet = nullptr) { SmallVector NowDeadInsts; @@ -105,6 +108,8 @@ else DeadInst->eraseFromParent(); + IOL.erase(DeadInst); + if (ValueSet) ValueSet->remove(DeadInst); } while (!NowDeadInsts.empty()); *BBI = NewIter; @@ -290,9 +295,6 @@ }; } -typedef DenseMap> InstOverlapIntervalsTy; - /// Return 'OverwriteComplete' if a store to the 'Later' location completely /// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of /// the 'Earlier' location is completely overwritten by 'Later', @@ -438,9 +440,9 @@ // // In this case we may want to trim the size of earlier to avoid generating // writes to addresses which will definitely be overwritten later - if (LaterOff > EarlierOff && - LaterOff < int64_t(EarlierOff + Earlier.Size) && - int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)) + if (!EnablePartialOverwriteTracking && + (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) && + int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))) return OverwriteEnd; // Finally, we also need to check if the later store overwrites the beginning @@ -452,9 +454,11 @@ // In this case we may want to move the destination address and trim the size // of earlier to avoid generating writes to addresses which will definitely // be overwritten later. - if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) { - assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size) - && "Expect to be handled as OverwriteComplete" ); + if (!EnablePartialOverwriteTracking && + (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) { + assert(int64_t(LaterOff + Later.Size) < + int64_t(EarlierOff + Earlier.Size) && + "Expect to be handled as OverwriteComplete"); return OverwriteBegin; } // Otherwise, they don't completely overlap. @@ -585,7 +589,8 @@ /// to a field of that structure. static bool handleFree(CallInst *F, AliasAnalysis *AA, MemoryDependenceResults *MD, DominatorTree *DT, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + InstOverlapIntervalsTy &IOL) { bool MadeChange = false; MemoryLocation Loc = MemoryLocation(F->getOperand(0)); @@ -614,7 +619,7 @@ // DCE instructions only used to calculate that store. BasicBlock::iterator BBI(Dependency); - deleteDeadInstruction(Dependency, &BBI, *MD, *TLI); + deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL); ++NumFastStores; MadeChange = true; @@ -669,7 +674,8 @@ /// ret void static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, MemoryDependenceResults *MD, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + InstOverlapIntervalsTy &IOL) { bool MadeChange = false; // Keep track of all of the stack objects that are dead at the end of the @@ -728,7 +734,7 @@ dbgs() << '\n'); // DCE instructions only used to calculate that store. - deleteDeadInstruction(Dead, &BBI, *MD, *TLI, &DeadStackObjects); + deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; @@ -737,7 +743,7 @@ // Remove any dead non-memory-mutating instructions. if (isInstructionTriviallyDead(&*BBI, TLI)) { - deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, &DeadStackObjects); + deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; @@ -819,10 +825,124 @@ return MadeChange; } +static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset, + int64_t &EarlierSize, int64_t LaterOffset, + int64_t LaterSize, bool IsOverwriteEnd) { + // TODO: base this on the target vector size so that if the earlier + // store was too small to get vector writes anyway then its likely + // a good idea to shorten it + // Power of 2 vector writes are probably always a bad idea to optimize + // as any store/memset/memcpy is likely using vector instructions so + // shortening it to not vector size is likely to be slower + MemIntrinsic *EarlierIntrinsic = cast(EarlierWrite); + unsigned EarlierWriteAlign = EarlierIntrinsic->getAlignment(); + if (!IsOverwriteEnd) + LaterOffset = int64_t(LaterOffset + LaterSize); + + if (!(llvm::isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) && + !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0)) + return false; + + DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " + << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *EarlierWrite + << "\n KILLER (offset " << LaterOffset << ", " << EarlierSize + << ")\n"); + + int64_t NewLength = IsOverwriteEnd + ? LaterOffset - EarlierOffset + : EarlierSize - (LaterOffset - EarlierOffset); + + Value *EarlierWriteLength = EarlierIntrinsic->getLength(); + Value *TrimmedLength = + ConstantInt::get(EarlierWriteLength->getType(), NewLength); + EarlierIntrinsic->setLength(TrimmedLength); + + EarlierSize = NewLength; + if (!IsOverwriteEnd) { + int64_t OffsetMoved = (LaterOffset - EarlierOffset); + Value *Indices[1] = { + ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)}; + GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds( + EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite); + EarlierIntrinsic->setDest(NewDestGEP); + EarlierOffset = EarlierOffset + OffsetMoved; + } + return true; +} + +static bool tryToShortenEnd(Instruction *EarlierWrite, + OverlapIntervalsTy &IntervalMap, + int64_t &EarlierStart, int64_t &EarlierSize) { + if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite)) + return false; + + OverlapIntervalsTy::iterator OII = --IntervalMap.end(); + int64_t LaterStart = OII->second; + int64_t LaterSize = OII->first - LaterStart; + + if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize && + LaterStart + LaterSize >= EarlierStart + EarlierSize) { + if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, + LaterSize, true)) { + IntervalMap.erase(OII); + return true; + } + } + return false; +} + +static bool tryToShortenBegin(Instruction *EarlierWrite, + OverlapIntervalsTy &IntervalMap, + int64_t &EarlierStart, int64_t &EarlierSize) { + if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite)) + return false; + + OverlapIntervalsTy::iterator OII = IntervalMap.begin(); + int64_t LaterStart = OII->second; + int64_t LaterSize = OII->first - LaterStart; + + if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) { + assert(LaterStart + LaterSize < EarlierStart + EarlierSize && + "Should have been handled as OverwriteComplete"); + if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart, + LaterSize, false)) { + IntervalMap.erase(OII); + return true; + } + } + return false; +} + +static bool removePartiallyOverlappedStores(AliasAnalysis *AA, + const DataLayout &DL, + InstOverlapIntervalsTy &IOL) { + bool Changed = false; + for (auto OI : IOL) { + Instruction *EarlierWrite = OI.first; + MemoryLocation Loc = getLocForWrite(EarlierWrite, *AA); + assert(isRemovable(EarlierWrite) && "Expect only removable instruction"); + assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem loc"); + + const Value *Ptr = Loc.Ptr->stripPointerCasts(); + int64_t EarlierStart = 0; + int64_t EarlierSize = int64_t(Loc.Size); + GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL); + OverlapIntervalsTy &IntervalMap = OI.second; + Changed = + tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); + if (IntervalMap.empty()) + continue; + Changed |= + tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize); + } + return Changed; +} + static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI, AliasAnalysis *AA, MemoryDependenceResults *MD, const DataLayout &DL, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + InstOverlapIntervalsTy &IOL) { // Must be a store instruction. StoreInst *SI = dyn_cast(Inst); if (!SI) @@ -837,7 +957,7 @@ DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n'); - deleteDeadInstruction(SI, &BBI, *MD, *TLI); + deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL); ++NumRedundantStores; return true; } @@ -855,7 +975,7 @@ dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); - deleteDeadInstruction(SI, &BBI, *MD, *TLI); + deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL); ++NumRedundantStores; return true; } @@ -876,7 +996,7 @@ for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { // Handle 'free' calls specially. if (CallInst *F = isFreeCall(&*BBI, TLI)) { - MadeChange |= handleFree(F, AA, MD, DT, TLI); + MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL); // Increment BBI after handleFree has potentially deleted instructions. // This ensures we maintain a valid iterator. ++BBI; @@ -890,7 +1010,7 @@ continue; // eliminateNoopStore will update in iterator, if necessary. - if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI)) { + if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL)) { MadeChange = true; continue; } @@ -938,7 +1058,7 @@ << *DepWrite << "\n KILLER: " << *Inst << '\n'); // Delete the store and now-dead instructions that feed it. - deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI); + deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL); ++NumFastStores; MadeChange = true; @@ -948,48 +1068,14 @@ } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) || ((OR == OverwriteBegin && isShortenableAtTheBeginning(DepWrite)))) { - // TODO: base this on the target vector size so that if the earlier - // store was too small to get vector writes anyway then its likely - // a good idea to shorten it - // Power of 2 vector writes are probably always a bad idea to optimize - // as any store/memset/memcpy is likely using vector instructions so - // shortening it to not vector size is likely to be slower - MemIntrinsic *DepIntrinsic = cast(DepWrite); - unsigned DepWriteAlign = DepIntrinsic->getAlignment(); + assert(!EnablePartialOverwriteTracking && "Do not expect to perform " + "when partial-overwrite " + "tracking is enabled"); + int64_t EarlierSize = DepLoc.Size; + int64_t LaterSize = Loc.Size; bool IsOverwriteEnd = (OR == OverwriteEnd); - if (!IsOverwriteEnd) - InstWriteOffset = int64_t(InstWriteOffset + Loc.Size); - - if ((llvm::isPowerOf2_64(InstWriteOffset) && - DepWriteAlign <= InstWriteOffset) || - ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { - - DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " - << (IsOverwriteEnd ? "END" : "BEGIN") << ": " - << *DepWrite << "\n KILLER (offset " - << InstWriteOffset << ", " << DepLoc.Size << ")" - << *Inst << '\n'); - - int64_t NewLength = - IsOverwriteEnd - ? InstWriteOffset - DepWriteOffset - : DepLoc.Size - (InstWriteOffset - DepWriteOffset); - - Value *DepWriteLength = DepIntrinsic->getLength(); - Value *TrimmedLength = - ConstantInt::get(DepWriteLength->getType(), NewLength); - DepIntrinsic->setLength(TrimmedLength); - - if (!IsOverwriteEnd) { - int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset); - Value *Indices[1] = { - ConstantInt::get(DepWriteLength->getType(), OffsetMoved)}; - GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds( - DepIntrinsic->getRawDest(), Indices, "", DepWrite); - DepIntrinsic->setDest(NewDestGEP); - } - MadeChange = true; - } + MadeChange = tryToShorten(DepWrite, DepWriteOffset, EarlierSize, + InstWriteOffset, LaterSize, IsOverwriteEnd); } } @@ -1012,10 +1098,13 @@ } } + if (EnablePartialOverwriteTracking) + MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL); + // If this block ends in a return, unwind, or unreachable, all allocas are // dead at its end, which means stores to them are also dead. if (BB.getTerminator()->getNumSuccessors() == 0) - MadeChange |= handleEndBlock(BB, AA, MD, TLI); + MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL); return MadeChange; } Index: test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll =================================================================== --- /dev/null +++ test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -basicaa -dse -S -enable-dse-partial-overwrite-tracking | FileCheck %s +; PR28588 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @_UPT_destroy(i8* nocapture %ptr) local_unnamed_addr #0 { +entry: + %edi = getelementptr inbounds i8, i8* %ptr, i64 8 + +; CHECK-NOT: tail call void @llvm.memset.p0i8.i64(i8* %edi, i8 0, i64 176, i32 8, i1 false) +; CHECK-NOT: store i32 -1, i32* %addr + + tail call void @llvm.memset.p0i8.i64(i8* %edi, i8 0, i64 176, i32 8, i1 false) + %format4.i = getelementptr inbounds i8, i8* %ptr, i64 144 + %addr = bitcast i8* %format4.i to i32* + store i32 -1, i32* %addr, align 8 + +; CHECK: tail call void @free + tail call void @free(i8* nonnull %ptr) + ret void +} + +; Function Attrs: nounwind +declare void @free(i8* nocapture) local_unnamed_addr #0 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1 + +attributes #0 = { nounwind } +attributes #1 = { argmemonly nounwind } Index: test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll =================================================================== --- test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll +++ test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll @@ -86,5 +86,23 @@ ret void } +define void @write8To15AndThen0To7(i64* nocapture %P) { +entry: +; CHECK-LABEL: @write8To15AndThen0To7( +; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %mybase0, i64 16 +; CHECK: tail call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 16, i32 8, i1 false) + + %base0 = bitcast i64* %P to i8* + %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 + tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32 8, i1 false) + + %base64_0 = getelementptr inbounds i64, i64* %P, i64 0 + %base64_1 = getelementptr inbounds i64, i64* %P, i64 1 + + store i64 1, i64* %base64_1 + store i64 2, i64* %base64_0 + ret void +} + declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind Index: test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll =================================================================== --- test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll +++ test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll @@ -93,3 +93,20 @@ store i64 3, i64* %tf_trapno, align 8 ret void } + +define void @write16To23AndThen24To31(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) { +entry: +; CHECK-LABEL: @write16To23AndThen24To31( +; CHECK: tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 16, i32 8, i1 false) + + %base0 = bitcast i64* %P to i8* + %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 + tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32 8, i1 false) + + %base64_2 = getelementptr inbounds i64, i64* %P, i64 2 + %base64_3 = getelementptr inbounds i64, i64* %P, i64 3 + + store i64 3, i64* %base64_2 + store i64 3, i64* %base64_3 + ret void +}