diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -617,23 +617,39 @@ /// instruction. static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, - AliasAnalysis *AA) { - SmallVector WorkList; - SmallPtrSet Visited; + AliasAnalysis *AA, + const DataLayout &DL, + DominatorTree *DT) { + // Do a backwards scan through the CFG from SecondI to FirstI. Look for + // instructions which can modify the memory location accessed by SecondI. + // + // While doing the walk keep track of the address to check. It might be + // different in different basic blocks due to PHI translation. + using BlockAddressPair = std::pair; + SmallVector WorkList; + // Keep track of the address we visited each block with. Bail out if we + // visit a block with different addresses. + DenseMap Visited; + BasicBlock::iterator FirstBBI(FirstI); ++FirstBBI; BasicBlock::iterator SecondBBI(SecondI); BasicBlock *FirstBB = FirstI->getParent(); BasicBlock *SecondBB = SecondI->getParent(); MemoryLocation MemLoc = MemoryLocation::get(SecondI); + auto *MemLocPtr = const_cast(MemLoc.Ptr); // Start checking the SecondBB. - WorkList.push_back(SecondBB); + WorkList.push_back( + std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr))); bool isFirstBlock = true; // Check all blocks going backward until we reach the FirstBB. while (!WorkList.empty()) { - BasicBlock *B = WorkList.pop_back_val(); + BlockAddressPair Current = WorkList.pop_back_val(); + BasicBlock *B = Current.first; + PHITransAddr &Addr = Current.second; + Value *Ptr = Addr.getAddr(); // Ignore instructions before FirstI if this is the FirstBB. BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin()); @@ -652,16 +668,31 @@ for (; BI != EI; ++BI) { Instruction *I = &*BI; if (I->mayWriteToMemory() && I != SecondI) - if (isModSet(AA->getModRefInfo(I, MemLoc))) + if (isModSet(AA->getModRefInfo(I, MemLoc.getWithNewPtr(Ptr)))) return false; } if (B != FirstBB) { assert(B != &FirstBB->getParent()->getEntryBlock() && "Should not hit the entry block because SI must be dominated by LI"); for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) { - if (!Visited.insert(*PredI).second) + PHITransAddr PredAddr = Addr; + if (PredAddr.NeedsPHITranslationFromBlock(B)) { + if (!PredAddr.IsPotentiallyPHITranslatable()) + return false; + if (PredAddr.PHITranslateValue(B, *PredI, DT, false)) + return false; + } + Value *TranslatedPtr = PredAddr.getAddr(); + auto Inserted = Visited.insert(std::make_pair(*PredI, TranslatedPtr)); + if (!Inserted.second) { + // We already visited this block before. If it was with a different + // address - bail out! + if (TranslatedPtr != Inserted.first->second) + return false; + // ... otherwise just skip it. continue; - WorkList.push_back(*PredI); + } + WorkList.push_back(std::make_pair(*PredI, PredAddr)); } } } @@ -1063,7 +1094,8 @@ const DataLayout &DL, const TargetLibraryInfo *TLI, InstOverlapIntervalsTy &IOL, - MapVector &ThrowableInst) { + MapVector &ThrowableInst, + DominatorTree *DT) { // Must be a store instruction. StoreInst *SI = dyn_cast(Inst); if (!SI) @@ -1073,7 +1105,8 @@ // then the store can be removed. if (LoadInst *DepLoad = dyn_cast(SI->getValueOperand())) { if (SI->getPointerOperand() == DepLoad->getPointerOperand() && - isRemovable(SI) && memoryIsNotModifiedBetween(DepLoad, SI, AA)) { + isRemovable(SI) && + memoryIsNotModifiedBetween(DepLoad, SI, AA, DL, DT)) { LLVM_DEBUG( dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " @@ -1092,7 +1125,7 @@ dyn_cast(GetUnderlyingObject(SI->getPointerOperand(), DL)); if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && - memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA)) { + memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA, DL, DT)) { LLVM_DEBUG( dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); @@ -1140,7 +1173,7 @@ // eliminateNoopStore will update in iterator, if necessary. if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, - ThrowableInst)) { + ThrowableInst, DT)) { MadeChange = true; continue; } @@ -1258,7 +1291,7 @@ Later && isa(Later->getValueOperand()) && DL.typeSizeEqualsStoreSize( Later->getValueOperand()->getType()) && - memoryIsNotModifiedBetween(Earlier, Later, AA)) { + memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) { // If the store we find is: // a) partially overwritten by the store to 'Loc' // b) the later store is fully contained in the earlier one and diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll @@ -893,5 +893,299 @@ ret void } +define i32 @test40() { +; CHECK-LABEL: @test40( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]] +; CHECK-NEXT: store i8 1, i8* [[P_NEXT]] +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, i8* [[P]] +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15 +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]] +; CHECK: return: +; CHECK-NEXT: ret i32 0 +; +entry: + %m = call i8* @calloc(i32 9, i32 20) + br label %loop +loop: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next + store i8 1, i8* %p.next + %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv + store i8 0, i8* %p + %continue = icmp ugt i64 %indvars.iv, 15 + br i1 %continue, label %loop, label %return +return: + ret i32 0 +} + +define i32 @test41() { +; CHECK-LABEL: @test41( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]] +; CHECK-NEXT: store i8 1, i8* [[P_NEXT]] +; CHECK-NEXT: br label [[CONT]] +; CHECK: cont: +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, i8* [[P]] +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15 +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]] +; CHECK: return: +; CHECK-NEXT: ret i32 0 +; +entry: + %m = call i8* @calloc(i32 9, i32 20) + br label %loop +loop: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next + store i8 1, i8* %p.next + br label %cont + +cont: + %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv + store i8 0, i8* %p + %continue = icmp ugt i64 %indvars.iv, 15 + br i1 %continue, label %loop, label %return + +return: + ret i32 0 +} + +; The store is redundant here, but currently we fail to eliminate it. +; We are walking from the store up to the calloc and translate phis as +; needed. In this case we fail to translate %p while going over the +; backedge. Because of that we conservatively assume that zero initialized +; memory is clobbered. +define i32 @test42() { +; CHECK-LABEL: @test42( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: br label [[CONT]] +; CHECK: cont: +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, i8* [[P]] +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15 +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]] +; CHECK: return: +; CHECK-NEXT: ret i32 0 +; +entry: + %m = call i8* @calloc(i32 9, i32 20) + br label %loop +loop: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %cont + +cont: + %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv + store i8 0, i8* %p + %continue = icmp ugt i64 %indvars.iv, 15 + br i1 %continue, label %loop, label %return + +return: + ret i32 0 +} + +define i32 @test43() { +; CHECK-LABEL: @test43( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT_2:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]] +; CHECK-NEXT: store i8 1, i8* [[P_NEXT]] +; CHECK-NEXT: br label [[CONT:%.*]] +; CHECK: cont: +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, i8* [[P]] +; CHECK-NEXT: br label [[CONT_2]] +; CHECK: cont.2: +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15 +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]] +; CHECK: return: +; CHECK-NEXT: ret i32 0 +; +entry: + %m = call i8* @calloc(i32 9, i32 20) + br label %loop +loop: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont.2 ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next + store i8 1, i8* %p.next + br label %cont + +cont: + %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv + store i8 0, i8* %p + br label %cont.2 + +cont.2: + %continue = icmp ugt i64 %indvars.iv, 15 + br i1 %continue, label %loop, label %return + +return: + ret i32 0 +} + +define i32 @test44() { +; CHECK-LABEL: @test44( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT_2:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]] +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, i8* [[P]] +; CHECK-NEXT: br label [[CONT:%.*]] +; CHECK: cont: +; CHECK-NEXT: store i8 1, i8* [[P_NEXT]] +; CHECK-NEXT: br label [[CONT_2]] +; CHECK: cont.2: +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15 +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]] +; CHECK: return: +; CHECK-NEXT: ret i32 0 +; +entry: + %m = call i8* @calloc(i32 9, i32 20) + br label %loop +loop: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont.2 ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next + %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv + store i8 0, i8* %p + br label %cont + +cont: + store i8 1, i8* %p.next + br label %cont.2 + +cont.2: + %continue = icmp ugt i64 %indvars.iv, 15 + br i1 %continue, label %loop, label %return + +return: + ret i32 0 +} + +; This is an example which can potentially benefit from PHI translation. +; Current implementation doesn't handle this case though. This is because +; we don't visit the same block with different addresses while looking for +; clobbering instructions. +define i32 @test45(i1 %c) { +; CHECK-LABEL: @test45( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) +; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 1 +; CHECK-NEXT: store i8 1, i8* [[P_1]] +; CHECK-NEXT: br label [[CONT:%.*]] +; CHECK: false: +; CHECK-NEXT: [[P_2:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 2 +; CHECK-NEXT: store i8 1, i8* [[P_2]] +; CHECK-NEXT: br label [[CONT]] +; CHECK: cont: +; CHECK-NEXT: [[OFFSET:%.*]] = phi i64 [ 2, [[TRUE]] ], [ 1, [[FALSE]] ] +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[OFFSET]] +; CHECK-NEXT: store i8 0, i8* [[P]] +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: return: +; CHECK-NEXT: ret i32 0 +; +entry: + %m = call i8* @calloc(i32 9, i32 20) + br i1 %c, label %true, label %false + +true: + %p.1 = getelementptr inbounds i8, i8* %m, i64 1 + store i8 1, i8* %p.1 + br label %cont + +false: + %p.2 = getelementptr inbounds i8, i8* %m, i64 2 + store i8 1, i8* %p.2 + br label %cont + +cont: + %offset = phi i64 [ 2, %true ], [ 1, %false ] + %p = getelementptr inbounds i8, i8* %m, i64 %offset + store i8 0, i8* %p + br label %return + +return: + ret i32 0 +} + +; This is test45 modified in a way to demonstrate PHI translation +; improving the accuracy of the analysis (on a slightly convoluted +; case though). +define i32 @test46(i1 %c) { +; CHECK-LABEL: @test46( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) +; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 1 +; CHECK-NEXT: [[P_2:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 2 +; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: store i8 1, i8* [[P_1]] +; CHECK-NEXT: br label [[CONT:%.*]] +; CHECK: false: +; CHECK-NEXT: store i8 1, i8* [[P_1]] +; CHECK-NEXT: br label [[CONT]] +; CHECK: cont: +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: return: +; CHECK-NEXT: ret i32 0 +; +entry: + %m = call i8* @calloc(i32 9, i32 20) + %p.1 = getelementptr inbounds i8, i8* %m, i64 1 + %p.2 = getelementptr inbounds i8, i8* %m, i64 2 + br i1 %c, label %true, label %false + +true: + store i8 1, i8* %p.1 + br label %cont + +false: + store i8 1, i8* %p.1 + br label %cont + +cont: + %offset = phi i64 [ 2, %true ], [ 2, %false ] + %p = getelementptr inbounds i8, i8* %m, i64 %offset + store i8 0, i8* %p + br label %return + +return: + ret i32 0 +} + declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32)