Skip to content

Commit 533bc3f

Browse files
author
Chad Rosier
committedDec 10, 2015
[DeadStoreElimination] Add support for non-local DSE.
We extend the search for redundant stores to predecessor blocks that unconditionally lead to the block BB with the current store instruction. That also includes single-block loops that unconditionally lead to BB, and if-then-else blocks where then- and else-blocks unconditionally lead to BB. http://reviews.llvm.org/D13363 Patch by Ivan Baev <ibaev@codeaurora.org>! llvm-svn: 255247
1 parent ac8d01a commit 533bc3f

File tree

6 files changed

+393
-90
lines changed

6 files changed

+393
-90
lines changed
 

‎llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

+242-90
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,8 @@
77
//
88
//===----------------------------------------------------------------------===//
99
//
10-
// This file implements a trivial dead store elimination that only considers
11-
// basic-block local redundant stores.
12-
//
13-
// FIXME: This should eventually be extended to be a post-dominator tree
14-
// traversal. Doing so would be pretty trivial.
10+
// This file implements dead store elimination that considers redundant stores
11+
// within a basic-block as well as across basic blocks in a reverse CFG order.
1512
//
1613
//===----------------------------------------------------------------------===//
1714

@@ -44,6 +41,13 @@ using namespace llvm;
4441
STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
4542
STATISTIC(NumFastStores, "Number of stores deleted");
4643
STATISTIC(NumFastOther , "Number of other instrs removed");
44+
STATISTIC(NumNonLocalStores, "Number of non-local stores deleted");
45+
46+
static cl::opt<bool> EnableNonLocalDSE("enable-nonlocal-dse", cl::init(true));
47+
48+
/// MaxNonLocalAttempts is an arbitrary threshold that provides
49+
/// an early opportunitiy for bail out to control compile time.
50+
static const unsigned MaxNonLocalAttempts = 100;
4751

4852
namespace {
4953
struct DSE : public FunctionPass {
@@ -80,6 +84,7 @@ namespace {
8084
bool runOnBasicBlock(BasicBlock &BB);
8185
bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI);
8286
bool HandleFree(CallInst *F);
87+
bool handleNonLocalDependency(Instruction *Inst);
8388
bool handleEndBlock(BasicBlock &BB);
8489
void RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
8590
SmallSetVector<Value *, 16> &DeadStackObjects,
@@ -485,6 +490,7 @@ static bool isPossibleSelfRead(Instruction *Inst,
485490
bool DSE::runOnBasicBlock(BasicBlock &BB) {
486491
const DataLayout &DL = BB.getModule()->getDataLayout();
487492
bool MadeChange = false;
493+
unsigned NumNonLocalAttempts = 0;
488494

489495
// Do a top-down walk on the BB.
490496
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
@@ -554,99 +560,101 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
554560

555561
MemDepResult InstDep = MD->getDependency(Inst);
556562

557-
// Ignore any store where we can't find a local dependence.
558-
// FIXME: cross-block DSE would be fun. :)
559-
if (!InstDep.isDef() && !InstDep.isClobber())
560-
continue;
561-
562-
// Figure out what location is being stored to.
563-
MemoryLocation Loc = getLocForWrite(Inst, *AA);
563+
if (InstDep.isDef() || InstDep.isClobber()) {
564+
// Figure out what location is being stored to.
565+
MemoryLocation Loc = getLocForWrite(Inst, *AA);
564566

565-
// If we didn't get a useful location, fail.
566-
if (!Loc.Ptr)
567-
continue;
568-
569-
while (InstDep.isDef() || InstDep.isClobber()) {
570-
// Get the memory clobbered by the instruction we depend on. MemDep will
571-
// skip any instructions that 'Loc' clearly doesn't interact with. If we
572-
// end up depending on a may- or must-aliased load, then we can't optimize
573-
// away the store and we bail out. However, if we depend on on something
574-
// that overwrites the memory location we *can* potentially optimize it.
575-
//
576-
// Find out what memory location the dependent instruction stores.
577-
Instruction *DepWrite = InstDep.getInst();
578-
MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA);
579-
// If we didn't get a useful location, or if it isn't a size, bail out.
580-
if (!DepLoc.Ptr)
581-
break;
567+
// If we didn't get a useful location, fail.
568+
if (!Loc.Ptr)
569+
continue;
582570

583-
// If we find a write that is a) removable (i.e., non-volatile), b) is
584-
// completely obliterated by the store to 'Loc', and c) which we know that
585-
// 'Inst' doesn't load from, then we can remove it.
586-
if (isRemovable(DepWrite) &&
587-
!isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
588-
int64_t InstWriteOffset, DepWriteOffset;
589-
OverwriteResult OR =
590-
isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
591-
if (OR == OverwriteComplete) {
592-
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
593-
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
594-
595-
// Delete the store and now-dead instructions that feed it.
596-
DeleteDeadInstruction(DepWrite, *MD, *TLI);
597-
++NumFastStores;
598-
MadeChange = true;
599-
600-
// DeleteDeadInstruction can delete the current instruction in loop
601-
// cases, reset BBI.
602-
BBI = Inst->getIterator();
603-
if (BBI != BB.begin())
604-
--BBI;
571+
while (InstDep.isDef() || InstDep.isClobber()) {
572+
// Get the memory clobbered by the instruction we depend on. MemDep
573+
// will skip any instructions that 'Loc' clearly doesn't interact with.
574+
// If we end up depending on a may- or must-aliased load, then we can't
575+
// optimize away the store and we bail out. However, if we depend on on
576+
// something that overwrites the memory location we *can* potentially
577+
// optimize it.
578+
//
579+
// Find out what memory location the dependent instruction stores.
580+
Instruction *DepWrite = InstDep.getInst();
581+
MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA);
582+
// If we didn't get a useful location, or if it isn't a size, bail out.
583+
if (!DepLoc.Ptr)
605584
break;
606-
} else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
607-
// TODO: base this on the target vector size so that if the earlier
608-
// store was too small to get vector writes anyway then its likely
609-
// a good idea to shorten it
610-
// Power of 2 vector writes are probably always a bad idea to optimize
611-
// as any store/memset/memcpy is likely using vector instructions so
612-
// shortening it to not vector size is likely to be slower
613-
MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
614-
unsigned DepWriteAlign = DepIntrinsic->getAlignment();
615-
if (llvm::isPowerOf2_64(InstWriteOffset) ||
616-
((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
617-
618-
DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: "
619-
<< *DepWrite << "\n KILLER (offset "
620-
<< InstWriteOffset << ", "
621-
<< DepLoc.Size << ")"
622-
<< *Inst << '\n');
623-
624-
Value* DepWriteLength = DepIntrinsic->getLength();
625-
Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
626-
InstWriteOffset -
627-
DepWriteOffset);
628-
DepIntrinsic->setLength(TrimmedLength);
585+
586+
// If we find a write that is a) removable (i.e., non-volatile), b) is
587+
// completely obliterated by the store to 'Loc', and c) which we know
588+
// that 'Inst' doesn't load from, then we can remove it.
589+
if (isRemovable(DepWrite) &&
590+
!isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
591+
int64_t InstWriteOffset, DepWriteOffset;
592+
OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI,
593+
DepWriteOffset, InstWriteOffset);
594+
if (OR == OverwriteComplete) {
595+
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite
596+
<< "\n KILLER: " << *Inst << '\n');
597+
598+
// Delete the store and now-dead instructions that feed it.
599+
DeleteDeadInstruction(DepWrite, *MD, *TLI);
600+
++NumFastStores;
629601
MadeChange = true;
602+
603+
// DeleteDeadInstruction can delete the current instruction in loop
604+
// cases, reset BBI.
605+
BBI = Inst->getIterator();
606+
if (BBI != BB.begin())
607+
--BBI;
608+
break;
609+
} else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
610+
// TODO: base this on the target vector size so that if the earlier
611+
// store was too small to get vector writes anyway then its likely a
612+
// good idea to shorten it.
613+
614+
// Power of 2 vector writes are probably always a bad idea to
615+
// optimize as any store/memset/memcpy is likely using vector
616+
// instructions so shortening it to not vector size is likely to be
617+
// slower.
618+
MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);
619+
unsigned DepWriteAlign = DepIntrinsic->getAlignment();
620+
if (llvm::isPowerOf2_64(InstWriteOffset) ||
621+
((DepWriteAlign != 0) &&
622+
InstWriteOffset % DepWriteAlign == 0)) {
623+
624+
DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " << *DepWrite
625+
<< "\n KILLER (offset " << InstWriteOffset << ", "
626+
<< DepLoc.Size << ")" << *Inst << '\n');
627+
628+
Value *DepWriteLength = DepIntrinsic->getLength();
629+
Value *TrimmedLength = ConstantInt::get(
630+
DepWriteLength->getType(), InstWriteOffset - DepWriteOffset);
631+
DepIntrinsic->setLength(TrimmedLength);
632+
MadeChange = true;
633+
}
630634
}
631635
}
632-
}
633636

634-
// If this is a may-aliased store that is clobbering the store value, we
635-
// can keep searching past it for another must-aliased pointer that stores
636-
// to the same location. For example, in:
637-
// store -> P
638-
// store -> Q
639-
// store -> P
640-
// we can remove the first store to P even though we don't know if P and Q
641-
// alias.
642-
if (DepWrite == &BB.front()) break;
643-
644-
// Can't look past this instruction if it might read 'Loc'.
645-
if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
646-
break;
637+
// If this is a may-aliased store that is clobbering the store value, we
638+
// can keep searching past it for another must-aliased pointer that
639+
// stores to the same location. For example, in
640+
// store -> P
641+
// store -> Q
642+
// store -> P
643+
// we can remove the first store to P even though we don't know if P and
644+
// Q alias.
645+
if (DepWrite == &BB.front())
646+
break;
647+
648+
// Can't look past this instruction if it might read 'Loc'.
649+
if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
650+
break;
647651

648-
InstDep = MD->getPointerDependencyFrom(Loc, false,
649-
DepWrite->getIterator(), &BB);
652+
InstDep = MD->getPointerDependencyFrom(Loc, false,
653+
DepWrite->getIterator(), &BB);
654+
}
655+
} else if (EnableNonLocalDSE && InstDep.isNonLocal()) { // DSE across BB
656+
if (++NumNonLocalAttempts < MaxNonLocalAttempts)
657+
MadeChange |= handleNonLocalDependency(Inst);
650658
}
651659
}
652660

@@ -658,6 +666,150 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
658666
return MadeChange;
659667
}
660668

669+
/// A helper for handleNonLocalDependency() function to find all blocks
670+
/// that lead to the input block BB and append them to the output PredBlocks.
671+
/// PredBlocks will include not only predecessors of BB that unconditionally
672+
/// lead to BB but also:
673+
/// - single-block loops that lead to BB, and
674+
/// - if-blocks for which one edge goes to BB and the other edge goes to
675+
/// a block in the input SafeBlocks.
676+
/// PredBlocks will not include blocks unreachable from the entry block, nor
677+
/// blocks that form cycles with BB.
678+
static void findSafePreds(SmallVectorImpl<BasicBlock *> &PredBlocks,
679+
SmallSetVector<BasicBlock *, 8> &SafeBlocks,
680+
BasicBlock *BB, DominatorTree *DT) {
681+
for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
682+
BasicBlock *Pred = *I;
683+
if (Pred == BB)
684+
continue;
685+
// The second check below prevents adding blocks that form a cycle with BB
686+
// in order to avoid potential problems due to MemoryDependenceAnalysis,
687+
// isOverwrite, etc. being not loop-aware.
688+
if (!DT->isReachableFromEntry(Pred) || DT->dominates(BB, Pred))
689+
continue;
690+
691+
bool PredIsSafe = true;
692+
for (auto II = succ_begin(Pred), EE = succ_end(Pred); II != EE; ++II) {
693+
BasicBlock *Succ = *II;
694+
if (Succ == BB || Succ == Pred) // shortcut, BB should be in SafeBlocks
695+
continue;
696+
if (!SafeBlocks.count(Succ)) {
697+
PredIsSafe = false;
698+
break;
699+
}
700+
}
701+
if (PredIsSafe)
702+
PredBlocks.push_back(Pred);
703+
}
704+
}
705+
706+
static bool underlyingObjectsDoNotAlias(StoreInst *SI, LoadInst *LI,
707+
const DataLayout &DL,
708+
AliasAnalysis &AA) {
709+
Value *AObj = GetUnderlyingObject(SI->getPointerOperand(), DL);
710+
SmallVector<Value *, 4> Pointers;
711+
GetUnderlyingObjects(LI->getPointerOperand(), Pointers, DL);
712+
713+
for (auto I = Pointers.begin(), E = Pointers.end(); I != E; ++I) {
714+
Value *BObj = *I;
715+
if (!AA.isNoAlias(AObj, DL.getTypeStoreSize(AObj->getType()), BObj,
716+
DL.getTypeStoreSize(BObj->getType())))
717+
return false;
718+
}
719+
return true;
720+
}
721+
722+
/// handleNonLocalDependency - Handle a non-local dependency on
723+
/// the input instruction Inst located in BB in attempt to remove
724+
/// redundant stores outside BB.
725+
bool DSE::handleNonLocalDependency(Instruction *Inst) {
726+
auto *SI = dyn_cast<StoreInst>(Inst);
727+
if (!SI)
728+
return false;
729+
// Get the location being stored to.
730+
// If we don't get a useful location, bail out.
731+
MemoryLocation Loc = getLocForWrite(SI, *AA);
732+
if (!Loc.Ptr)
733+
return false;
734+
735+
bool MadeChange = false;
736+
BasicBlock *BB = Inst->getParent();
737+
const DataLayout &DL = BB->getModule()->getDataLayout();
738+
739+
// Worklist of predecessor blocks of BB
740+
SmallVector<BasicBlock *, 8> Blocks;
741+
// Keep track of all predecessor blocks that are safe to search through
742+
SmallSetVector<BasicBlock *, 8> SafeBlocks;
743+
SafeBlocks.insert(BB);
744+
findSafePreds(Blocks, SafeBlocks, BB, DT);
745+
746+
while (!Blocks.empty()) {
747+
BasicBlock *PB = Blocks.pop_back_val();
748+
MemDepResult Dep =
749+
MD->getPointerDependencyFrom(Loc, false, PB->end(), PB, SI);
750+
while (Dep.isDef() || Dep.isClobber()) {
751+
Instruction *Dependency = Dep.getInst();
752+
753+
// Filter out false dependency from a load to SI looking through phis.
754+
if (auto *LI = dyn_cast<LoadInst>(Dependency)) {
755+
if (underlyingObjectsDoNotAlias(SI, LI, DL, *AA)) {
756+
Dep = MD->getPointerDependencyFrom(Loc, false,
757+
Dependency->getIterator(), PB, SI);
758+
continue;
759+
}
760+
}
761+
762+
// If we don't get a useful location for the dependent instruction,
763+
// it doesn't write memory, it is not removable, or it might read Loc,
764+
// then bail out.
765+
MemoryLocation DepLoc = getLocForWrite(Dependency, *AA);
766+
if (!DepLoc.Ptr || !hasMemoryWrite(Dependency, *TLI) ||
767+
!isRemovable(Dependency) ||
768+
(AA->getModRefInfo(Dependency, Loc) & MRI_Ref))
769+
break;
770+
771+
// Don't remove a store within single-block loops;
772+
// we need more analysis: e.g. looking for an interferring load
773+
// above the store within the loop, etc.
774+
bool SingleBlockLoop = false;
775+
for (auto I = succ_begin(PB), E = succ_end(PB); I != E; ++I) {
776+
BasicBlock *Succ = *I;
777+
if (Succ == PB) {
778+
SingleBlockLoop = true;
779+
break;
780+
}
781+
}
782+
if (SingleBlockLoop)
783+
break;
784+
785+
int64_t InstWriteOffset, DepWriteOffset;
786+
OverwriteResult OR =
787+
isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
788+
if (OR == OverwriteComplete) {
789+
DEBUG(dbgs() << "DSE: Remove Non-Local Dead Store:\n DEAD: "
790+
<< *Dependency << "\n KILLER: " << *SI << '\n');
791+
792+
// Delete redundant store and now-dead instructions that feed it.
793+
auto Next = std::next(Dependency->getIterator());
794+
DeleteDeadInstruction(Dependency, *MD, *TLI);
795+
++NumNonLocalStores;
796+
MadeChange = true;
797+
Dep = MD->getPointerDependencyFrom(Loc, false, Next, PB, SI);
798+
continue;
799+
}
800+
// TODO: attempt shortening of Dependency inst as in the local case
801+
break;
802+
}
803+
804+
if (Dep.isNonLocal()) {
805+
SafeBlocks.insert(PB);
806+
findSafePreds(Blocks, SafeBlocks, PB, DT);
807+
}
808+
}
809+
810+
return MadeChange;
811+
}
812+
661813
/// Returns true if the memory which is accessed by the second instruction is not
662814
/// modified between the first and the second instruction.
663815
/// Precondition: Second instruction must be dominated by the first

0 commit comments

Comments
 (0)
Please sign in to comment.