Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" @@ -90,6 +91,8 @@ //===----------------------------------------------------------------------===// using OverlapIntervalsTy = std::map; using InstOverlapIntervalsTy = DenseMap; +using BackedgesTy = + SmallSet, 8>; /// Delete this instruction. Before we do, go through and zero out all the /// operands of this instruction. If any of them become dead, delete them and @@ -584,8 +587,33 @@ /// instruction. static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, - AliasAnalysis *AA) { - SmallVector WorkList; + AliasAnalysis *AA, + BackedgesTy &Backedges) { + // Do a backwards scan through the CFG from SecondI to FirstI. Look for + // instructions which can modify the memory location accessed by SecondI. + // + // While scanning through the CFG keep track whether we visited the block + // through a back edge. If we visited through a back edge it means that we are + // analyzing instructions across different iterations of the loop. We + // need to be more conservative in this case. Consider this example: + // a = calloc(n+1) + // for (int i = 0; i < n; i++) { + // store 1, a[i+1] // (1) + // store 0, a[i] // (2) + // } + // Scanning from store (2) to the calloc we'll visit the loop basic block + // through the backedge. If we ask BasicAA whether store (1) aliases with + // store (2) it would report NoAlias since the two addresses are distinct + // offsets from the same base (a[i] and a[i+1]). It's correct within one + // iteration of the loop, but across different iterations they alias. + // + // If we visit a block through a backedge we treat any may-write-to-memory + // instruction as a clobber. + // + // We use PointerIntPair to track the BasicBlock to visit along with the fact + // whether we visited it through a back edge or not. + using Block = PointerIntPair; + SmallVector WorkList; SmallPtrSet Visited; BasicBlock::iterator FirstBBI(FirstI); ++FirstBBI; @@ -595,12 +623,15 @@ MemoryLocation MemLoc = MemoryLocation::get(SecondI); // Start checking the store-block. - WorkList.push_back(SecondBB); + WorkList.emplace_back(SecondBB, /*VisitedThroughBackedge=*/false); + bool isFirstBlock = true; // Check all blocks going backward until we reach the load-block. while (!WorkList.empty()) { - BasicBlock *B = WorkList.pop_back_val(); + Block CurBlock = WorkList.pop_back_val(); + BasicBlock *B = CurBlock.getPointer(); + bool VisitedThroughBackedge = CurBlock.getInt(); // Ignore instructions before LI if this is the FirstBB. BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin()); @@ -618,17 +649,25 @@ } for (; BI != EI; ++BI) { Instruction *I = &*BI; - if (I->mayWriteToMemory() && I != SecondI) + if (I->mayWriteToMemory() && I != SecondI) { + if (VisitedThroughBackedge) + // If we visited this block through a backedge we are analyzing + // instructions across different iterations. Be conservative and + // treat any may-write-to-memory as clobbering. + return false; if (isModSet(AA->getModRefInfo(I, MemLoc))) return false; + } } if (B != FirstBB) { assert(B != &FirstBB->getParent()->getEntryBlock() && "Should not hit the entry block because SI must be dominated by LI"); for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) { - if (!Visited.insert(*PredI).second) + auto *Pred = *PredI; + if (!Visited.insert(Pred).second) continue; - WorkList.push_back(*PredI); + bool IsBackedge = Backedges.count(std::make_pair(B, Pred)); + WorkList.emplace_back(Pred, VisitedThroughBackedge | IsBackedge); } } } @@ -1028,7 +1067,8 @@ const DataLayout &DL, const TargetLibraryInfo *TLI, InstOverlapIntervalsTy &IOL, - OrderedBasicBlock &OBB) { + OrderedBasicBlock &OBB, + BackedgesTy &Backedges) { // Must be a store instruction. StoreInst *SI = dyn_cast(Inst); if (!SI) @@ -1038,7 +1078,8 @@ // then the store can be removed. if (LoadInst *DepLoad = dyn_cast(SI->getValueOperand())) { if (SI->getPointerOperand() == DepLoad->getPointerOperand() && - isRemovable(SI) && memoryIsNotModifiedBetween(DepLoad, SI, AA)) { + isRemovable(SI) && + memoryIsNotModifiedBetween(DepLoad, SI, AA, Backedges)) { LLVM_DEBUG( dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " @@ -1057,7 +1098,7 @@ dyn_cast(GetUnderlyingObject(SI->getPointerOperand(), DL)); if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && - memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA)) { + memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA, Backedges)) { LLVM_DEBUG( dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); @@ -1072,7 +1113,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, MemoryDependenceResults *MD, DominatorTree *DT, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + BackedgesTy &Backedges) { const DataLayout &DL = BB.getModule()->getDataLayout(); bool MadeChange = false; @@ -1105,7 +1147,7 @@ continue; // eliminateNoopStore will update in iterator, if necessary. - if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, OBB)) { + if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, OBB, Backedges)) { MadeChange = true; continue; } @@ -1216,7 +1258,7 @@ Later && isa(Later->getValueOperand()) && DL.typeSizeEqualsStoreSize( Later->getValueOperand()->getType()) && - memoryIsNotModifiedBetween(Earlier, Later, AA)) { + memoryIsNotModifiedBetween(Earlier, Later, AA, Backedges)) { // If the store we find is: // a) partially overwritten by the store to 'Loc' // b) the later store is fully contained in the earlier one and @@ -1312,12 +1354,19 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis *AA, MemoryDependenceResults *MD, DominatorTree *DT, const TargetLibraryInfo *TLI) { + SmallVector, 8> + BackedgesVector; + llvm::FindFunctionBackedges(F, BackedgesVector); + BackedgesTy Backedges; + for (auto BE : BackedgesVector) + Backedges.insert(BE); + bool MadeChange = false; for (BasicBlock &BB : F) // Only check non-dead blocks. Dead blocks may have strange pointer // cycles that will confuse alias analysis. if (DT->isReachableFromEntry(&BB)) - MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI); + MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI, Backedges); return MadeChange; } Index: test/Transforms/DeadStoreElimination/simple.ll =================================================================== --- test/Transforms/DeadStoreElimination/simple.ll +++ test/Transforms/DeadStoreElimination/simple.ll @@ -893,5 +893,77 @@ ret void } +define i32 @test40() { +; CHECK-LABEL: @test40( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]] +; CHECK-NEXT: store i8 1, i8* [[P_NEXT]] +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, i8* [[P]] +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15 +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]] +; CHECK: return: +; CHECK-NEXT: ret i32 0 +; +entry: + %m = call i8* @calloc(i32 9, i32 20) + br label %loop +loop: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next + store i8 1, i8* %p.next + %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv + store i8 0, i8* %p + %continue = icmp ugt i64 %indvars.iv, 15 + br i1 %continue, label %loop, label %return +return: + ret i32 0 +} + +define i32 @test41() { +; CHECK-LABEL: @test41( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]] +; CHECK-NEXT: store i8 1, i8* [[P_NEXT]] +; CHECK-NEXT: br label [[CONT]] +; CHECK: cont: +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, i8* [[P]] +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15 +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]] +; CHECK: return: +; CHECK-NEXT: ret i32 0 +; +entry: + %m = call i8* @calloc(i32 9, i32 20) + br label %loop +loop: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next + store i8 1, i8* %p.next + br label %cont + +cont: + %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv + store i8 0, i8* %p + %continue = icmp ugt i64 %indvars.iv, 15 + br i1 %continue, label %loop, label %return + +return: + ret i32 0 +} + declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32)