Skip to content

Commit 9909872

Browse files
committedFeb 21, 2017
The patch introduces new way of narrowing complex (>UINT16 variants) solutions.
The new method introduced under "-lsr-exp-narrow" option (currenlty set to true). Summary: The method is based on registers number mathematical expectation and should be generally closer to optimal solution. Please see details in comments to "LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas()" function (in lib/Transforms/Scalar/LoopStrengthReduce.cpp). Reviewers: qcolombet Differential Revision: http://reviews.llvm.org/D29862 From: Evgeny Stupachenko <evstupac@gmail.com> llvm-svn: 295704
1 parent 60f1fe8 commit 9909872

File tree

3 files changed

+161
-3
lines changed

3 files changed

+161
-3
lines changed
 

‎llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

+159-1
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,12 @@ static cl::opt<bool> InsnsCost(
134134
"lsr-insns-cost", cl::Hidden, cl::init(false),
135135
cl::desc("Add instruction count to a LSR cost model"));
136136

137+
// Flag to choose how to narrow complex lsr solution
138+
static cl::opt<bool> LSRExpNarrow(
139+
"lsr-exp-narrow", cl::Hidden, cl::init(true),
140+
cl::desc("Narrow LSR complex solution using"
141+
" expectation of registers number"));
142+
137143
#ifndef NDEBUG
138144
// Stress test IV chain generation.
139145
static cl::opt<bool> StressIVChain(
@@ -1095,6 +1101,7 @@ class LSRUse {
10951101
}
10961102

10971103
bool HasFormulaWithSameRegs(const Formula &F) const;
1104+
float getNotSelectedProbability(const SCEV *Reg) const;
10981105
bool InsertFormula(const Formula &F);
10991106
void DeleteFormula(Formula &F);
11001107
void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
@@ -1373,6 +1380,15 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
13731380
return Uniquifier.count(Key);
13741381
}
13751382

1383+
/// The function returns a probability of selecting formula without Reg.
1384+
float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {
1385+
unsigned FNum = 0;
1386+
for (const Formula &F : Formulae)
1387+
if (F.referencesReg(Reg))
1388+
FNum++;
1389+
return ((float)(Formulae.size() - FNum)) / Formulae.size();
1390+
}
1391+
13761392
/// If the given formula has not yet been inserted, add it to the list, and
13771393
/// return true. Return false otherwise. The formula must be in canonical form.
13781394
bool LSRUse::InsertFormula(const Formula &F) {
@@ -1846,6 +1862,7 @@ class LSRInstance {
18461862
void NarrowSearchSpaceByDetectingSupersets();
18471863
void NarrowSearchSpaceByCollapsingUnrolledCode();
18481864
void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
1865+
void NarrowSearchSpaceByDeletingCostlyFormulas();
18491866
void NarrowSearchSpaceByPickingWinnerRegs();
18501867
void NarrowSearchSpaceUsingHeuristics();
18511868

@@ -4247,6 +4264,144 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
42474264
}
42484265
}
42494266

4267+
/// The function delete formulas with high registers number expectation.
4268+
/// Assuming we don't know the value of each formula (already delete
4269+
/// all inefficient), generate probability of not selecting for each
4270+
/// register.
4271+
/// For example,
4272+
/// Use1:
4273+
/// reg(a) + reg({0,+,1})
4274+
/// reg(a) + reg({-1,+,1}) + 1
4275+
/// reg({a,+,1})
4276+
/// Use2:
4277+
/// reg(b) + reg({0,+,1})
4278+
/// reg(b) + reg({-1,+,1}) + 1
4279+
/// reg({b,+,1})
4280+
/// Use3:
4281+
/// reg(c) + reg(b) + reg({0,+,1})
4282+
/// reg(c) + reg({b,+,1})
4283+
///
4284+
/// Probability of not selecting
4285+
/// Use1 Use2 Use3
4286+
/// reg(a) (1/3) * 1 * 1
4287+
/// reg(b) 1 * (1/3) * (1/2)
4288+
/// reg({0,+,1}) (2/3) * (2/3) * (1/2)
4289+
/// reg({-1,+,1}) (2/3) * (2/3) * 1
4290+
/// reg({a,+,1}) (2/3) * 1 * 1
4291+
/// reg({b,+,1}) 1 * (2/3) * (2/3)
4292+
/// reg(c) 1 * 1 * 0
4293+
///
4294+
/// Now count registers number mathematical expectation for each formula:
4295+
/// Note that for each use we exclude probability if not selecting for the use.
4296+
/// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding
4297+
/// probabilty 1/3 of not selecting for Use1).
4298+
/// Use1:
4299+
/// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted
4300+
/// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted
4301+
/// reg({a,+,1}) 1
4302+
/// Use2:
4303+
/// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted
4304+
/// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted
4305+
/// reg({b,+,1}) 2/3
4306+
/// Use3:
4307+
/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
4308+
/// reg(c) + reg({b,+,1}) 1 + 2/3
4309+
4310+
void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
4311+
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4312+
return;
4313+
// Ok, we have too many of formulae on our hands to conveniently handle.
4314+
// Use a rough heuristic to thin out the list.
4315+
4316+
// Set of Regs wich will be 100% used in final solution.
4317+
// Used in each formula of a solution (in example above this is reg(c)).
4318+
// We can skip them in calculations.
4319+
SmallPtrSet<const SCEV *, 4> UniqRegs;
4320+
DEBUG(dbgs() << "The search space is too complex.\n");
4321+
4322+
// Map each register to probability of not selecting
4323+
DenseMap <const SCEV *, float> RegNumMap;
4324+
for (const SCEV *Reg : RegUses) {
4325+
if (UniqRegs.count(Reg))
4326+
continue;
4327+
float PNotSel = 1;
4328+
for (const LSRUse &LU : Uses) {
4329+
if (!LU.Regs.count(Reg))
4330+
continue;
4331+
float P = LU.getNotSelectedProbability(Reg);
4332+
if (P != 0.0)
4333+
PNotSel *= P;
4334+
else
4335+
UniqRegs.insert(Reg);
4336+
}
4337+
RegNumMap.insert(std::make_pair(Reg, PNotSel));
4338+
}
4339+
4340+
DEBUG(dbgs() << "Narrowing the search space by deleting costly formulas\n");
4341+
4342+
// Delete formulas where registers number expectation is high.
4343+
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4344+
LSRUse &LU = Uses[LUIdx];
4345+
// If nothing to delete - continue.
4346+
if (LU.Formulae.size() < 2)
4347+
continue;
4348+
// This is temporary solution to test performance. Float should be
4349+
// replaced with round independent type (based on integers) to avoid
4350+
// different results for different target builds.
4351+
float FMinRegNum = LU.Formulae[0].getNumRegs();
4352+
float FMinARegNum = LU.Formulae[0].getNumRegs();
4353+
size_t MinIdx = 0;
4354+
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4355+
Formula &F = LU.Formulae[i];
4356+
float FRegNum = 0;
4357+
float FARegNum = 0;
4358+
for (const SCEV *BaseReg : F.BaseRegs) {
4359+
if (UniqRegs.count(BaseReg))
4360+
continue;
4361+
FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
4362+
if (isa<SCEVAddRecExpr>(BaseReg))
4363+
FARegNum +=
4364+
RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
4365+
}
4366+
if (const SCEV *ScaledReg = F.ScaledReg) {
4367+
if (!UniqRegs.count(ScaledReg)) {
4368+
FRegNum +=
4369+
RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
4370+
if (isa<SCEVAddRecExpr>(ScaledReg))
4371+
FARegNum +=
4372+
RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
4373+
}
4374+
}
4375+
if (FMinRegNum > FRegNum ||
4376+
(FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
4377+
FMinRegNum = FRegNum;
4378+
FMinARegNum = FARegNum;
4379+
MinIdx = i;
4380+
}
4381+
}
4382+
DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());
4383+
dbgs() << " with min reg num " << FMinRegNum << '\n');
4384+
if (MinIdx != 0)
4385+
std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
4386+
while (LU.Formulae.size() != 1) {
4387+
DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs());
4388+
dbgs() << '\n');
4389+
LU.Formulae.pop_back();
4390+
}
4391+
LU.RecomputeRegs(LUIdx, RegUses);
4392+
assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");
4393+
Formula &F = LU.Formulae[0];
4394+
DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n');
4395+
// When we choose the formula, the regs become unique.
4396+
UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
4397+
if (F.ScaledReg)
4398+
UniqRegs.insert(F.ScaledReg);
4399+
}
4400+
DEBUG(dbgs() << "After pre-selection:\n";
4401+
print_uses(dbgs()));
4402+
}
4403+
4404+
42504405
/// Pick a register which seems likely to be profitable, and then in any use
42514406
/// which has any reference to that register, delete all formulae which do not
42524407
/// reference that register.
@@ -4319,7 +4474,10 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
43194474
NarrowSearchSpaceByDetectingSupersets();
43204475
NarrowSearchSpaceByCollapsingUnrolledCode();
43214476
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
4322-
NarrowSearchSpaceByPickingWinnerRegs();
4477+
if (LSRExpNarrow)
4478+
NarrowSearchSpaceByDeletingCostlyFormulas();
4479+
else
4480+
NarrowSearchSpaceByPickingWinnerRegs();
43234481
}
43244482

43254483
/// This is the recursive solver.

‎llvm/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
1414
; current LSR cost model.
1515
; CHECK-NOT: = ptrtoint i8* undef to i64
1616
; CHECK: .lr.ph
17-
; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp5, 1
17+
; CHECK: [[TMP:%[^ ]+]] = add i64 %4, 1
1818
; CHECK: sub i64 [[TMP]], %tmp6
1919
; CHECK: ret void
2020
define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 {

‎llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ for.end: ; preds = %for.body, %entry
139139
; Consequently, we should *not* form any chains.
140140
;
141141
; A9: foldedidx:
142-
; A9: ldrb{{(.w)?}} {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3]
142+
; A9: ldrb{{(.w)?}} {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #403]
143143
define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
144144
entry:
145145
br label %for.body

0 commit comments

Comments
 (0)
Please sign in to comment.