Skip to content

Commit 9c20156

Browse files
committedMay 3, 2019
[MIR] Add simple PRE pass to MachineCSE
This is the second part of the commit fixing PR38917 (hoisting partitially redundant machine instruction). Most of PRE (partitial redundancy elimination) and CSE work is done on LLVM IR, but some of redundancy arises during DAG legalization. Machine CSE is not enough to deal with it. This simple PRE implementation works a little bit intricately: it passes before CSE, looking for partitial redundancy and transforming it to fully redundancy, anticipating that the next CSE step will eliminate this created redundancy. If CSE doesn't eliminate this, than created instruction will remain dead and eliminated later by Remove Dead Machine Instructions pass. The third part of the commit is supposed to refactor MachineCSE, to make it more clear and to merge MachinePRE with MachineCSE, so one need no rely on further Remove Dead pass to clear instrs not eliminated by CSE. First step: https://reviews.llvm.org/D54839 Fixes llvm.org/PR38917 Reviewers: RKSimon Subscribers: hfinkel, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D56772 llvm-svn: 359870
1 parent 88f9117 commit 9c20156

9 files changed

+1515
-2084
lines changed
 

‎llvm/lib/CodeGen/MachineCSE.cpp

+117-9
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/SmallVector.h"
2020
#include "llvm/ADT/Statistic.h"
2121
#include "llvm/Analysis/AliasAnalysis.h"
22+
#include "llvm/Analysis/CFG.h"
2223
#include "llvm/CodeGen/MachineBasicBlock.h"
2324
#include "llvm/CodeGen/MachineDominators.h"
2425
#include "llvm/CodeGen/MachineFunction.h"
@@ -49,6 +50,8 @@ using namespace llvm;
4950

5051
STATISTIC(NumCoalesces, "Number of copies coalesced");
5152
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
53+
STATISTIC(NumPREs, "Number of partial redundant expression"
54+
" transformed to fully redundant");
5255
STATISTIC(NumPhysCSEs,
5356
"Number of physreg referencing common subexpr eliminated");
5457
STATISTIC(NumCrossBBCSEs,
@@ -84,6 +87,7 @@ namespace {
8487

8588
void releaseMemory() override {
8689
ScopeMap.clear();
90+
PREMap.clear();
8791
Exps.clear();
8892
}
8993

@@ -98,6 +102,8 @@ namespace {
98102

99103
unsigned LookAheadLimit = 0;
100104
DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
105+
DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
106+
PREMap;
101107
ScopedHTType VNT;
102108
SmallVector<MachineInstr *, 64> Exps;
103109
unsigned CurrVN = 0;
@@ -116,13 +122,17 @@ namespace {
116122
PhysDefVector &PhysDefs, bool &NonLocal) const;
117123
bool isCSECandidate(MachineInstr *MI);
118124
bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
119-
MachineInstr *CSMI, MachineInstr *MI);
125+
MachineBasicBlock *CSBB, MachineInstr *MI);
120126
void EnterScope(MachineBasicBlock *MBB);
121127
void ExitScope(MachineBasicBlock *MBB);
122-
bool ProcessBlock(MachineBasicBlock *MBB);
128+
bool ProcessBlockCSE(MachineBasicBlock *MBB);
123129
void ExitScopeIfDone(MachineDomTreeNode *Node,
124130
DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
125131
bool PerformCSE(MachineDomTreeNode *Node);
132+
133+
bool isPRECandidate(MachineInstr *MI);
134+
bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
135+
bool PerformSimplePRE(MachineDominatorTree *DT);
126136
};
127137

128138
} // end anonymous namespace
@@ -405,9 +415,10 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
405415
}
406416

407417
/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
408-
/// common expression that defines Reg.
418+
/// common expression that defines Reg. CSBB is basic block where CSReg is
419+
/// defined.
409420
bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
410-
MachineInstr *CSMI, MachineInstr *MI) {
421+
MachineBasicBlock *CSBB, MachineInstr *MI) {
411422
// FIXME: Heuristics that works around the lack the live range splitting.
412423

413424
// If CSReg is used at all uses of Reg, CSE should not increase register
@@ -433,7 +444,6 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
433444
// an immediate predecessor. We don't want to increase register pressure and
434445
// end up causing other computation to be spilled.
435446
if (TII->isAsCheapAsAMove(*MI)) {
436-
MachineBasicBlock *CSBB = CSMI->getParent();
437447
MachineBasicBlock *BB = MI->getParent();
438448
if (CSBB != BB && !CSBB->isSuccessor(BB))
439449
return false;
@@ -488,7 +498,7 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
488498
ScopeMap.erase(SI);
489499
}
490500

491-
bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
501+
bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
492502
bool Changed = false;
493503

494504
SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
@@ -598,7 +608,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
598608
TargetRegisterInfo::isVirtualRegister(NewReg) &&
599609
"Do not CSE physical register defs!");
600610

601-
if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
611+
if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
602612
LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
603613
DoCSE = false;
604614
break;
@@ -738,14 +748,109 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
738748
for (MachineDomTreeNode *Node : Scopes) {
739749
MachineBasicBlock *MBB = Node->getBlock();
740750
EnterScope(MBB);
741-
Changed |= ProcessBlock(MBB);
751+
Changed |= ProcessBlockCSE(MBB);
742752
// If it's a leaf node, it's done. Traverse upwards to pop ancestors.
743753
ExitScopeIfDone(Node, OpenChildren);
744754
}
745755

746756
return Changed;
747757
}
748758

759+
// We use stronger checks for PRE candidate rather than for CSE ones to embrace
760+
// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
761+
// to exclude instrs created by PRE that won't be CSEed later.
762+
bool MachineCSE::isPRECandidate(MachineInstr *MI) {
763+
if (!isCSECandidate(MI) ||
764+
MI->isNotDuplicable() ||
765+
MI->isAsCheapAsAMove() ||
766+
MI->getNumDefs() != 1 ||
767+
MI->getNumExplicitDefs() != 1)
768+
return false;
769+
770+
for (auto def : MI->defs())
771+
if (!TRI->isVirtualRegister(def.getReg()))
772+
return false;
773+
774+
for (auto use : MI->uses())
775+
if (use.isReg() && !TRI->isVirtualRegister(use.getReg()))
776+
return false;
777+
778+
return true;
779+
}
780+
781+
bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
782+
MachineBasicBlock *MBB) {
783+
bool Changed = false;
784+
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
785+
MachineInstr *MI = &*I;
786+
++I;
787+
788+
if (!isPRECandidate(MI))
789+
continue;
790+
791+
if (!PREMap.count(MI)) {
792+
PREMap[MI] = MBB;
793+
continue;
794+
}
795+
796+
auto MBB1 = PREMap[MI];
797+
assert(
798+
!DT->properlyDominates(MBB, MBB1) &&
799+
"MBB cannot properly dominate MBB1 while DFS through dominators tree!");
800+
auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
801+
802+
// Two instrs are partial redundant if their basic blocks are reachable
803+
// from one to another but one doesn't dominate another.
804+
if (CMBB != MBB1) {
805+
auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
806+
if (BB != nullptr && BB1 != nullptr &&
807+
(isPotentiallyReachable(BB1, BB) ||
808+
isPotentiallyReachable(BB, BB1))) {
809+
810+
assert(MI->getOperand(0).isDef() &&
811+
"First operand of instr with one explicit def must be this def");
812+
unsigned VReg = MI->getOperand(0).getReg();
813+
unsigned NewReg = MRI->cloneVirtualRegister(VReg);
814+
if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
815+
continue;
816+
MachineInstr &NewMI =
817+
TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
818+
NewMI.getOperand(0).setReg(NewReg);
819+
820+
PREMap[MI] = CMBB;
821+
++NumPREs;
822+
Changed = true;
823+
}
824+
}
825+
}
826+
return Changed;
827+
}
828+
829+
// This simple PRE (partial redundancy elimination) pass doesn't actually
830+
// eliminate partial redundancy but transforms it to full redundancy,
831+
// anticipating that the next CSE step will eliminate this created redundancy.
832+
// If CSE doesn't eliminate this, than created instruction will remain dead
833+
// and eliminated later by Remove Dead Machine Instructions pass.
834+
bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
835+
SmallVector<MachineDomTreeNode *, 32> BBs;
836+
837+
PREMap.clear();
838+
bool Changed = false;
839+
BBs.push_back(DT->getRootNode());
840+
do {
841+
auto Node = BBs.pop_back_val();
842+
const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
843+
for (MachineDomTreeNode *Child : Children)
844+
BBs.push_back(Child);
845+
846+
MachineBasicBlock *MBB = Node->getBlock();
847+
Changed |= ProcessBlockPRE(DT, MBB);
848+
849+
} while (!BBs.empty());
850+
851+
return Changed;
852+
}
853+
749854
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
750855
if (skipFunction(MF.getFunction()))
751856
return false;
@@ -756,5 +861,8 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
756861
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
757862
DT = &getAnalysis<MachineDominatorTree>();
758863
LookAheadLimit = TII->getMachineCSELookAheadLimit();
759-
return PerformCSE(DT->getRootNode());
864+
bool ChangedPRE, ChangedCSE;
865+
ChangedPRE = PerformSimplePRE(DT);
866+
ChangedCSE = PerformCSE(DT->getRootNode());
867+
return ChangedPRE || ChangedCSE;
760868
}

‎llvm/test/CodeGen/Mips/internalfunc.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ if.then: ; preds = %entry
2727
if.end: ; preds = %entry, %if.then
2828
; CHECK: lw $[[R2:[0-9]+]], %got(sf2)
2929
; CHECK: addiu ${{[0-9]+}}, $[[R2]], %lo(sf2)
30-
; CHECK: lw $[[R3:[0-9]+]], %got(caller.sf1)
31-
; CHECK: sw ${{[0-9]+}}, %lo(caller.sf1)($[[R3]])
30+
; CHECK: sw ${{[0-9]+}}, %lo(caller.sf1)($[[R1]])
3231
%tobool3 = icmp ne i32 %a0, 0
3332
%tmp4 = load void (...)*, void (...)** @gf1, align 4
3433
%cond = select i1 %tobool3, void (...)* %tmp4, void (...)* bitcast (void ()* @sf2 to void (...)*)

0 commit comments

Comments
 (0)
Please sign in to comment.