7
7
//
8
8
// ===----------------------------------------------------------------------===//
9
9
//
10
- // This file implements a trivial dead store elimination that only considers
11
- // basic-block local redundant stores.
12
- //
13
- // FIXME: This should eventually be extended to be a post-dominator tree
14
- // traversal. Doing so would be pretty trivial.
10
+ // This file implements dead store elimination that considers redundant stores
11
+ // within a basic-block as well as across basic blocks in a reverse CFG order.
15
12
//
16
13
// ===----------------------------------------------------------------------===//
17
14
@@ -44,6 +41,13 @@ using namespace llvm;
44
41
STATISTIC (NumRedundantStores, " Number of redundant stores deleted" );
45
42
STATISTIC (NumFastStores, " Number of stores deleted" );
46
43
STATISTIC (NumFastOther , " Number of other instrs removed" );
44
+ STATISTIC (NumNonLocalStores, " Number of non-local stores deleted" );
45
+
46
+ static cl::opt<bool > EnableNonLocalDSE (" enable-nonlocal-dse" , cl::init(true ));
47
+
48
+ // / MaxNonLocalAttempts is an arbitrary threshold that provides
49
+ // / an early opportunitiy for bail out to control compile time.
50
+ static const unsigned MaxNonLocalAttempts = 100 ;
47
51
48
52
namespace {
49
53
struct DSE : public FunctionPass {
@@ -80,6 +84,7 @@ namespace {
80
84
bool runOnBasicBlock (BasicBlock &BB);
81
85
bool MemoryIsNotModifiedBetween (Instruction *FirstI, Instruction *SecondI);
82
86
bool HandleFree (CallInst *F);
87
+ bool handleNonLocalDependency (Instruction *Inst);
83
88
bool handleEndBlock (BasicBlock &BB);
84
89
void RemoveAccessedObjects (const MemoryLocation &LoadedLoc,
85
90
SmallSetVector<Value *, 16 > &DeadStackObjects,
@@ -485,6 +490,7 @@ static bool isPossibleSelfRead(Instruction *Inst,
485
490
bool DSE::runOnBasicBlock (BasicBlock &BB) {
486
491
const DataLayout &DL = BB.getModule ()->getDataLayout ();
487
492
bool MadeChange = false ;
493
+ unsigned NumNonLocalAttempts = 0 ;
488
494
489
495
// Do a top-down walk on the BB.
490
496
for (BasicBlock::iterator BBI = BB.begin (), BBE = BB.end (); BBI != BBE; ) {
@@ -554,99 +560,101 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
554
560
555
561
MemDepResult InstDep = MD->getDependency (Inst);
556
562
557
- // Ignore any store where we can't find a local dependence.
558
- // FIXME: cross-block DSE would be fun. :)
559
- if (!InstDep.isDef () && !InstDep.isClobber ())
560
- continue ;
561
-
562
- // Figure out what location is being stored to.
563
- MemoryLocation Loc = getLocForWrite (Inst, *AA);
563
+ if (InstDep.isDef () || InstDep.isClobber ()) {
564
+ // Figure out what location is being stored to.
565
+ MemoryLocation Loc = getLocForWrite (Inst, *AA);
564
566
565
- // If we didn't get a useful location, fail.
566
- if (!Loc.Ptr )
567
- continue ;
568
-
569
- while (InstDep.isDef () || InstDep.isClobber ()) {
570
- // Get the memory clobbered by the instruction we depend on. MemDep will
571
- // skip any instructions that 'Loc' clearly doesn't interact with. If we
572
- // end up depending on a may- or must-aliased load, then we can't optimize
573
- // away the store and we bail out. However, if we depend on on something
574
- // that overwrites the memory location we *can* potentially optimize it.
575
- //
576
- // Find out what memory location the dependent instruction stores.
577
- Instruction *DepWrite = InstDep.getInst ();
578
- MemoryLocation DepLoc = getLocForWrite (DepWrite, *AA);
579
- // If we didn't get a useful location, or if it isn't a size, bail out.
580
- if (!DepLoc.Ptr )
581
- break ;
567
+ // If we didn't get a useful location, fail.
568
+ if (!Loc.Ptr )
569
+ continue ;
582
570
583
- // If we find a write that is a) removable (i.e., non-volatile), b) is
584
- // completely obliterated by the store to 'Loc', and c) which we know that
585
- // 'Inst' doesn't load from, then we can remove it.
586
- if (isRemovable (DepWrite) &&
587
- !isPossibleSelfRead (Inst, Loc, DepWrite, *TLI, *AA)) {
588
- int64_t InstWriteOffset, DepWriteOffset;
589
- OverwriteResult OR =
590
- isOverwrite (Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
591
- if (OR == OverwriteComplete) {
592
- DEBUG (dbgs () << " DSE: Remove Dead Store:\n DEAD: "
593
- << *DepWrite << " \n KILLER: " << *Inst << ' \n ' );
594
-
595
- // Delete the store and now-dead instructions that feed it.
596
- DeleteDeadInstruction (DepWrite, *MD, *TLI);
597
- ++NumFastStores;
598
- MadeChange = true ;
599
-
600
- // DeleteDeadInstruction can delete the current instruction in loop
601
- // cases, reset BBI.
602
- BBI = Inst->getIterator ();
603
- if (BBI != BB.begin ())
604
- --BBI;
571
+ while (InstDep.isDef () || InstDep.isClobber ()) {
572
+ // Get the memory clobbered by the instruction we depend on. MemDep
573
+ // will skip any instructions that 'Loc' clearly doesn't interact with.
574
+ // If we end up depending on a may- or must-aliased load, then we can't
575
+ // optimize away the store and we bail out. However, if we depend on on
576
+ // something that overwrites the memory location we *can* potentially
577
+ // optimize it.
578
+ //
579
+ // Find out what memory location the dependent instruction stores.
580
+ Instruction *DepWrite = InstDep.getInst ();
581
+ MemoryLocation DepLoc = getLocForWrite (DepWrite, *AA);
582
+ // If we didn't get a useful location, or if it isn't a size, bail out.
583
+ if (!DepLoc.Ptr )
605
584
break ;
606
- } else if (OR == OverwriteEnd && isShortenable (DepWrite)) {
607
- // TODO: base this on the target vector size so that if the earlier
608
- // store was too small to get vector writes anyway then its likely
609
- // a good idea to shorten it
610
- // Power of 2 vector writes are probably always a bad idea to optimize
611
- // as any store/memset/memcpy is likely using vector instructions so
612
- // shortening it to not vector size is likely to be slower
613
- MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
614
- unsigned DepWriteAlign = DepIntrinsic->getAlignment ();
615
- if (llvm::isPowerOf2_64 (InstWriteOffset) ||
616
- ((DepWriteAlign != 0 ) && InstWriteOffset % DepWriteAlign == 0 )) {
617
-
618
- DEBUG (dbgs () << " DSE: Remove Dead Store:\n OW END: "
619
- << *DepWrite << " \n KILLER (offset "
620
- << InstWriteOffset << " , "
621
- << DepLoc.Size << " )"
622
- << *Inst << ' \n ' );
623
-
624
- Value* DepWriteLength = DepIntrinsic->getLength ();
625
- Value* TrimmedLength = ConstantInt::get (DepWriteLength->getType (),
626
- InstWriteOffset -
627
- DepWriteOffset);
628
- DepIntrinsic->setLength (TrimmedLength);
585
+
586
+ // If we find a write that is a) removable (i.e., non-volatile), b) is
587
+ // completely obliterated by the store to 'Loc', and c) which we know
588
+ // that 'Inst' doesn't load from, then we can remove it.
589
+ if (isRemovable (DepWrite) &&
590
+ !isPossibleSelfRead (Inst, Loc, DepWrite, *TLI, *AA)) {
591
+ int64_t InstWriteOffset, DepWriteOffset;
592
+ OverwriteResult OR = isOverwrite (Loc, DepLoc, DL, *TLI,
593
+ DepWriteOffset, InstWriteOffset);
594
+ if (OR == OverwriteComplete) {
595
+ DEBUG (dbgs () << " DSE: Remove Dead Store:\n DEAD: " << *DepWrite
596
+ << " \n KILLER: " << *Inst << ' \n ' );
597
+
598
+ // Delete the store and now-dead instructions that feed it.
599
+ DeleteDeadInstruction (DepWrite, *MD, *TLI);
600
+ ++NumFastStores;
629
601
MadeChange = true ;
602
+
603
+ // DeleteDeadInstruction can delete the current instruction in loop
604
+ // cases, reset BBI.
605
+ BBI = Inst->getIterator ();
606
+ if (BBI != BB.begin ())
607
+ --BBI;
608
+ break ;
609
+ } else if (OR == OverwriteEnd && isShortenable (DepWrite)) {
610
+ // TODO: base this on the target vector size so that if the earlier
611
+ // store was too small to get vector writes anyway then its likely a
612
+ // good idea to shorten it.
613
+
614
+ // Power of 2 vector writes are probably always a bad idea to
615
+ // optimize as any store/memset/memcpy is likely using vector
616
+ // instructions so shortening it to not vector size is likely to be
617
+ // slower.
618
+ MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);
619
+ unsigned DepWriteAlign = DepIntrinsic->getAlignment ();
620
+ if (llvm::isPowerOf2_64 (InstWriteOffset) ||
621
+ ((DepWriteAlign != 0 ) &&
622
+ InstWriteOffset % DepWriteAlign == 0 )) {
623
+
624
+ DEBUG (dbgs () << " DSE: Remove Dead Store:\n OW END: " << *DepWrite
625
+ << " \n KILLER (offset " << InstWriteOffset << " , "
626
+ << DepLoc.Size << " )" << *Inst << ' \n ' );
627
+
628
+ Value *DepWriteLength = DepIntrinsic->getLength ();
629
+ Value *TrimmedLength = ConstantInt::get (
630
+ DepWriteLength->getType (), InstWriteOffset - DepWriteOffset);
631
+ DepIntrinsic->setLength (TrimmedLength);
632
+ MadeChange = true ;
633
+ }
630
634
}
631
635
}
632
- }
633
636
634
- // If this is a may-aliased store that is clobbering the store value, we
635
- // can keep searching past it for another must-aliased pointer that stores
636
- // to the same location. For example, in:
637
- // store -> P
638
- // store -> Q
639
- // store -> P
640
- // we can remove the first store to P even though we don't know if P and Q
641
- // alias.
642
- if (DepWrite == &BB.front ()) break ;
643
-
644
- // Can't look past this instruction if it might read 'Loc'.
645
- if (AA->getModRefInfo (DepWrite, Loc) & MRI_Ref)
646
- break ;
637
+ // If this is a may-aliased store that is clobbering the store value, we
638
+ // can keep searching past it for another must-aliased pointer that
639
+ // stores to the same location. For example, in
640
+ // store -> P
641
+ // store -> Q
642
+ // store -> P
643
+ // we can remove the first store to P even though we don't know if P and
644
+ // Q alias.
645
+ if (DepWrite == &BB.front ())
646
+ break ;
647
+
648
+ // Can't look past this instruction if it might read 'Loc'.
649
+ if (AA->getModRefInfo (DepWrite, Loc) & MRI_Ref)
650
+ break ;
647
651
648
- InstDep = MD->getPointerDependencyFrom (Loc, false ,
649
- DepWrite->getIterator (), &BB);
652
+ InstDep = MD->getPointerDependencyFrom (Loc, false ,
653
+ DepWrite->getIterator (), &BB);
654
+ }
655
+ } else if (EnableNonLocalDSE && InstDep.isNonLocal ()) { // DSE across BB
656
+ if (++NumNonLocalAttempts < MaxNonLocalAttempts)
657
+ MadeChange |= handleNonLocalDependency (Inst);
650
658
}
651
659
}
652
660
@@ -658,6 +666,150 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
658
666
return MadeChange;
659
667
}
660
668
669
+ // / A helper for handleNonLocalDependency() function to find all blocks
670
+ // / that lead to the input block BB and append them to the output PredBlocks.
671
+ // / PredBlocks will include not only predecessors of BB that unconditionally
672
+ // / lead to BB but also:
673
+ // / - single-block loops that lead to BB, and
674
+ // / - if-blocks for which one edge goes to BB and the other edge goes to
675
+ // / a block in the input SafeBlocks.
676
+ // / PredBlocks will not include blocks unreachable from the entry block, nor
677
+ // / blocks that form cycles with BB.
678
+ static void findSafePreds (SmallVectorImpl<BasicBlock *> &PredBlocks,
679
+ SmallSetVector<BasicBlock *, 8 > &SafeBlocks,
680
+ BasicBlock *BB, DominatorTree *DT) {
681
+ for (auto I = pred_begin (BB), E = pred_end (BB); I != E; ++I) {
682
+ BasicBlock *Pred = *I;
683
+ if (Pred == BB)
684
+ continue ;
685
+ // The second check below prevents adding blocks that form a cycle with BB
686
+ // in order to avoid potential problems due to MemoryDependenceAnalysis,
687
+ // isOverwrite, etc. being not loop-aware.
688
+ if (!DT->isReachableFromEntry (Pred) || DT->dominates (BB, Pred))
689
+ continue ;
690
+
691
+ bool PredIsSafe = true ;
692
+ for (auto II = succ_begin (Pred), EE = succ_end (Pred); II != EE; ++II) {
693
+ BasicBlock *Succ = *II;
694
+ if (Succ == BB || Succ == Pred) // shortcut, BB should be in SafeBlocks
695
+ continue ;
696
+ if (!SafeBlocks.count (Succ)) {
697
+ PredIsSafe = false ;
698
+ break ;
699
+ }
700
+ }
701
+ if (PredIsSafe)
702
+ PredBlocks.push_back (Pred);
703
+ }
704
+ }
705
+
706
+ static bool underlyingObjectsDoNotAlias (StoreInst *SI, LoadInst *LI,
707
+ const DataLayout &DL,
708
+ AliasAnalysis &AA) {
709
+ Value *AObj = GetUnderlyingObject (SI->getPointerOperand (), DL);
710
+ SmallVector<Value *, 4 > Pointers;
711
+ GetUnderlyingObjects (LI->getPointerOperand (), Pointers, DL);
712
+
713
+ for (auto I = Pointers.begin (), E = Pointers.end (); I != E; ++I) {
714
+ Value *BObj = *I;
715
+ if (!AA.isNoAlias (AObj, DL.getTypeStoreSize (AObj->getType ()), BObj,
716
+ DL.getTypeStoreSize (BObj->getType ())))
717
+ return false ;
718
+ }
719
+ return true ;
720
+ }
721
+
722
+ // / handleNonLocalDependency - Handle a non-local dependency on
723
+ // / the input instruction Inst located in BB in attempt to remove
724
+ // / redundant stores outside BB.
725
+ bool DSE::handleNonLocalDependency (Instruction *Inst) {
726
+ auto *SI = dyn_cast<StoreInst>(Inst);
727
+ if (!SI)
728
+ return false ;
729
+ // Get the location being stored to.
730
+ // If we don't get a useful location, bail out.
731
+ MemoryLocation Loc = getLocForWrite (SI, *AA);
732
+ if (!Loc.Ptr )
733
+ return false ;
734
+
735
+ bool MadeChange = false ;
736
+ BasicBlock *BB = Inst->getParent ();
737
+ const DataLayout &DL = BB->getModule ()->getDataLayout ();
738
+
739
+ // Worklist of predecessor blocks of BB
740
+ SmallVector<BasicBlock *, 8 > Blocks;
741
+ // Keep track of all predecessor blocks that are safe to search through
742
+ SmallSetVector<BasicBlock *, 8 > SafeBlocks;
743
+ SafeBlocks.insert (BB);
744
+ findSafePreds (Blocks, SafeBlocks, BB, DT);
745
+
746
+ while (!Blocks.empty ()) {
747
+ BasicBlock *PB = Blocks.pop_back_val ();
748
+ MemDepResult Dep =
749
+ MD->getPointerDependencyFrom (Loc, false , PB->end (), PB, SI);
750
+ while (Dep.isDef () || Dep.isClobber ()) {
751
+ Instruction *Dependency = Dep.getInst ();
752
+
753
+ // Filter out false dependency from a load to SI looking through phis.
754
+ if (auto *LI = dyn_cast<LoadInst>(Dependency)) {
755
+ if (underlyingObjectsDoNotAlias (SI, LI, DL, *AA)) {
756
+ Dep = MD->getPointerDependencyFrom (Loc, false ,
757
+ Dependency->getIterator (), PB, SI);
758
+ continue ;
759
+ }
760
+ }
761
+
762
+ // If we don't get a useful location for the dependent instruction,
763
+ // it doesn't write memory, it is not removable, or it might read Loc,
764
+ // then bail out.
765
+ MemoryLocation DepLoc = getLocForWrite (Dependency, *AA);
766
+ if (!DepLoc.Ptr || !hasMemoryWrite (Dependency, *TLI) ||
767
+ !isRemovable (Dependency) ||
768
+ (AA->getModRefInfo (Dependency, Loc) & MRI_Ref))
769
+ break ;
770
+
771
+ // Don't remove a store within single-block loops;
772
+ // we need more analysis: e.g. looking for an interferring load
773
+ // above the store within the loop, etc.
774
+ bool SingleBlockLoop = false ;
775
+ for (auto I = succ_begin (PB), E = succ_end (PB); I != E; ++I) {
776
+ BasicBlock *Succ = *I;
777
+ if (Succ == PB) {
778
+ SingleBlockLoop = true ;
779
+ break ;
780
+ }
781
+ }
782
+ if (SingleBlockLoop)
783
+ break ;
784
+
785
+ int64_t InstWriteOffset, DepWriteOffset;
786
+ OverwriteResult OR =
787
+ isOverwrite (Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
788
+ if (OR == OverwriteComplete) {
789
+ DEBUG (dbgs () << " DSE: Remove Non-Local Dead Store:\n DEAD: "
790
+ << *Dependency << " \n KILLER: " << *SI << ' \n ' );
791
+
792
+ // Delete redundant store and now-dead instructions that feed it.
793
+ auto Next = std::next (Dependency->getIterator ());
794
+ DeleteDeadInstruction (Dependency, *MD, *TLI);
795
+ ++NumNonLocalStores;
796
+ MadeChange = true ;
797
+ Dep = MD->getPointerDependencyFrom (Loc, false , Next, PB, SI);
798
+ continue ;
799
+ }
800
+ // TODO: attempt shortening of Dependency inst as in the local case
801
+ break ;
802
+ }
803
+
804
+ if (Dep.isNonLocal ()) {
805
+ SafeBlocks.insert (PB);
806
+ findSafePreds (Blocks, SafeBlocks, PB, DT);
807
+ }
808
+ }
809
+
810
+ return MadeChange;
811
+ }
812
+
661
813
// / Returns true if the memory which is accessed by the second instruction is not
662
814
// / modified between the first and the second instruction.
663
815
// / Precondition: Second instruction must be dominated by the first
0 commit comments