19
19
#include " llvm/ADT/SmallVector.h"
20
20
#include " llvm/ADT/Statistic.h"
21
21
#include " llvm/Analysis/AliasAnalysis.h"
22
+ #include " llvm/Analysis/CFG.h"
22
23
#include " llvm/CodeGen/MachineBasicBlock.h"
23
24
#include " llvm/CodeGen/MachineDominators.h"
24
25
#include " llvm/CodeGen/MachineFunction.h"
@@ -49,6 +50,8 @@ using namespace llvm;
49
50
50
51
STATISTIC (NumCoalesces, " Number of copies coalesced" );
51
52
STATISTIC (NumCSEs, " Number of common subexpression eliminated" );
53
+ STATISTIC (NumPREs, " Number of partial redundant expression"
54
+ " transformed to fully redundant" );
52
55
STATISTIC (NumPhysCSEs,
53
56
" Number of physreg referencing common subexpr eliminated" );
54
57
STATISTIC (NumCrossBBCSEs,
@@ -84,6 +87,7 @@ namespace {
84
87
85
88
void releaseMemory () override {
86
89
ScopeMap.clear ();
90
+ PREMap.clear ();
87
91
Exps.clear ();
88
92
}
89
93
@@ -98,6 +102,8 @@ namespace {
98
102
99
103
unsigned LookAheadLimit = 0 ;
100
104
DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
105
+ DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
106
+ PREMap;
101
107
ScopedHTType VNT;
102
108
SmallVector<MachineInstr *, 64 > Exps;
103
109
unsigned CurrVN = 0 ;
@@ -116,13 +122,17 @@ namespace {
116
122
PhysDefVector &PhysDefs, bool &NonLocal) const ;
117
123
bool isCSECandidate (MachineInstr *MI);
118
124
bool isProfitableToCSE (unsigned CSReg, unsigned Reg,
119
- MachineInstr *CSMI , MachineInstr *MI);
125
+ MachineBasicBlock *CSBB , MachineInstr *MI);
120
126
void EnterScope (MachineBasicBlock *MBB);
121
127
void ExitScope (MachineBasicBlock *MBB);
122
- bool ProcessBlock (MachineBasicBlock *MBB);
128
+ bool ProcessBlockCSE (MachineBasicBlock *MBB);
123
129
void ExitScopeIfDone (MachineDomTreeNode *Node,
124
130
DenseMap<MachineDomTreeNode*, unsigned > &OpenChildren);
125
131
bool PerformCSE (MachineDomTreeNode *Node);
132
+
133
+ bool isPRECandidate (MachineInstr *MI);
134
+ bool ProcessBlockPRE (MachineDominatorTree *MDT, MachineBasicBlock *MBB);
135
+ bool PerformSimplePRE (MachineDominatorTree *DT);
126
136
};
127
137
128
138
} // end anonymous namespace
@@ -405,9 +415,10 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
405
415
}
406
416
407
417
// / isProfitableToCSE - Return true if it's profitable to eliminate MI with a
408
- // / common expression that defines Reg.
418
+ // / common expression that defines Reg. CSBB is basic block where CSReg is
419
+ // / defined.
409
420
bool MachineCSE::isProfitableToCSE (unsigned CSReg, unsigned Reg,
410
- MachineInstr *CSMI , MachineInstr *MI) {
421
+ MachineBasicBlock *CSBB , MachineInstr *MI) {
411
422
// FIXME: Heuristics that works around the lack the live range splitting.
412
423
413
424
// If CSReg is used at all uses of Reg, CSE should not increase register
@@ -433,7 +444,6 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
433
444
// an immediate predecessor. We don't want to increase register pressure and
434
445
// end up causing other computation to be spilled.
435
446
if (TII->isAsCheapAsAMove (*MI)) {
436
- MachineBasicBlock *CSBB = CSMI->getParent ();
437
447
MachineBasicBlock *BB = MI->getParent ();
438
448
if (CSBB != BB && !CSBB->isSuccessor (BB))
439
449
return false ;
@@ -488,7 +498,7 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
488
498
ScopeMap.erase (SI);
489
499
}
490
500
491
- bool MachineCSE::ProcessBlock (MachineBasicBlock *MBB) {
501
+ bool MachineCSE::ProcessBlockCSE (MachineBasicBlock *MBB) {
492
502
bool Changed = false ;
493
503
494
504
SmallVector<std::pair<unsigned , unsigned >, 8 > CSEPairs;
@@ -598,7 +608,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
598
608
TargetRegisterInfo::isVirtualRegister (NewReg) &&
599
609
" Do not CSE physical register defs!" );
600
610
601
- if (!isProfitableToCSE (NewReg, OldReg, CSMI, MI)) {
611
+ if (!isProfitableToCSE (NewReg, OldReg, CSMI-> getParent () , MI)) {
602
612
LLVM_DEBUG (dbgs () << " *** Not profitable, avoid CSE!\n " );
603
613
DoCSE = false ;
604
614
break ;
@@ -738,14 +748,109 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
738
748
for (MachineDomTreeNode *Node : Scopes) {
739
749
MachineBasicBlock *MBB = Node->getBlock ();
740
750
EnterScope (MBB);
741
- Changed |= ProcessBlock (MBB);
751
+ Changed |= ProcessBlockCSE (MBB);
742
752
// If it's a leaf node, it's done. Traverse upwards to pop ancestors.
743
753
ExitScopeIfDone (Node, OpenChildren);
744
754
}
745
755
746
756
return Changed;
747
757
}
748
758
759
+ // We use stronger checks for PRE candidate rather than for CSE ones to embrace
760
+ // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
761
+ // to exclude instrs created by PRE that won't be CSEed later.
762
+ bool MachineCSE::isPRECandidate (MachineInstr *MI) {
763
+ if (!isCSECandidate (MI) ||
764
+ MI->isNotDuplicable () ||
765
+ MI->isAsCheapAsAMove () ||
766
+ MI->getNumDefs () != 1 ||
767
+ MI->getNumExplicitDefs () != 1 )
768
+ return false ;
769
+
770
+ for (auto def : MI->defs ())
771
+ if (!TRI->isVirtualRegister (def.getReg ()))
772
+ return false ;
773
+
774
+ for (auto use : MI->uses ())
775
+ if (use.isReg () && !TRI->isVirtualRegister (use.getReg ()))
776
+ return false ;
777
+
778
+ return true ;
779
+ }
780
+
781
+ bool MachineCSE::ProcessBlockPRE (MachineDominatorTree *DT,
782
+ MachineBasicBlock *MBB) {
783
+ bool Changed = false ;
784
+ for (MachineBasicBlock::iterator I = MBB->begin (), E = MBB->end (); I != E;) {
785
+ MachineInstr *MI = &*I;
786
+ ++I;
787
+
788
+ if (!isPRECandidate (MI))
789
+ continue ;
790
+
791
+ if (!PREMap.count (MI)) {
792
+ PREMap[MI] = MBB;
793
+ continue ;
794
+ }
795
+
796
+ auto MBB1 = PREMap[MI];
797
+ assert (
798
+ !DT->properlyDominates (MBB, MBB1) &&
799
+ " MBB cannot properly dominate MBB1 while DFS through dominators tree!" );
800
+ auto CMBB = DT->findNearestCommonDominator (MBB, MBB1);
801
+
802
+ // Two instrs are partial redundant if their basic blocks are reachable
803
+ // from one to another but one doesn't dominate another.
804
+ if (CMBB != MBB1) {
805
+ auto BB = MBB->getBasicBlock (), BB1 = MBB1->getBasicBlock ();
806
+ if (BB != nullptr && BB1 != nullptr &&
807
+ (isPotentiallyReachable (BB1, BB) ||
808
+ isPotentiallyReachable (BB, BB1))) {
809
+
810
+ assert (MI->getOperand (0 ).isDef () &&
811
+ " First operand of instr with one explicit def must be this def" );
812
+ unsigned VReg = MI->getOperand (0 ).getReg ();
813
+ unsigned NewReg = MRI->cloneVirtualRegister (VReg);
814
+ if (!isProfitableToCSE (NewReg, VReg, CMBB, MI))
815
+ continue ;
816
+ MachineInstr &NewMI =
817
+ TII->duplicate (*CMBB, CMBB->getFirstTerminator (), *MI);
818
+ NewMI.getOperand (0 ).setReg (NewReg);
819
+
820
+ PREMap[MI] = CMBB;
821
+ ++NumPREs;
822
+ Changed = true ;
823
+ }
824
+ }
825
+ }
826
+ return Changed;
827
+ }
828
+
829
+ // This simple PRE (partial redundancy elimination) pass doesn't actually
830
+ // eliminate partial redundancy but transforms it to full redundancy,
831
+ // anticipating that the next CSE step will eliminate this created redundancy.
832
+ // If CSE doesn't eliminate this, than created instruction will remain dead
833
+ // and eliminated later by Remove Dead Machine Instructions pass.
834
+ bool MachineCSE::PerformSimplePRE (MachineDominatorTree *DT) {
835
+ SmallVector<MachineDomTreeNode *, 32 > BBs;
836
+
837
+ PREMap.clear ();
838
+ bool Changed = false ;
839
+ BBs.push_back (DT->getRootNode ());
840
+ do {
841
+ auto Node = BBs.pop_back_val ();
842
+ const std::vector<MachineDomTreeNode *> &Children = Node->getChildren ();
843
+ for (MachineDomTreeNode *Child : Children)
844
+ BBs.push_back (Child);
845
+
846
+ MachineBasicBlock *MBB = Node->getBlock ();
847
+ Changed |= ProcessBlockPRE (DT, MBB);
848
+
849
+ } while (!BBs.empty ());
850
+
851
+ return Changed;
852
+ }
853
+
749
854
bool MachineCSE::runOnMachineFunction (MachineFunction &MF) {
750
855
if (skipFunction (MF.getFunction ()))
751
856
return false ;
@@ -756,5 +861,8 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
756
861
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults ();
757
862
DT = &getAnalysis<MachineDominatorTree>();
758
863
LookAheadLimit = TII->getMachineCSELookAheadLimit ();
759
- return PerformCSE (DT->getRootNode ());
864
+ bool ChangedPRE, ChangedCSE;
865
+ ChangedPRE = PerformSimplePRE (DT);
866
+ ChangedCSE = PerformCSE (DT->getRootNode ());
867
+ return ChangedPRE || ChangedCSE;
760
868
}
0 commit comments