This is an archive of the discontinued LLVM Phabricator instance.

[LV][NFC] Optimize out an extra call to isMoreProfitable
Needs ReviewPublic

Authored by ebrevnov on Sep 13 2021, 3:01 AM.

Download Raw Diff

Details

Reviewers

fhahn
Ayal
anna
lebedev.ri

Summary

Current implementation of selecting best cost for epilog vectorization is sub-optimal and can be improved by ~2x. The problem is that isMoreProfitable is called twice. First we compare cost of each candidate against scalar cost and cache those ones that are more optimal than the scalar one. Second we travers via cached VFs and try to find most beneficial among them. The thing is that we don't really need to know which ones are better than the scalar one. It's simply enough to do the second step plus one compare against scalar cost.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

ebrevnov created this revision.Sep 13 2021, 3:01 AM

Herald added a subscriber: hiraditya. · View Herald TranscriptSep 13 2021, 3:01 AM

ebrevnov requested review of this revision.Sep 13 2021, 3:01 AM

Herald added a project: Restricted Project. · View Herald TranscriptSep 13 2021, 3:01 AM

Herald added a subscriber: llvm-commits. · View Herald Transcript

ebrevnov added a parent revision: D109678: [LV][NFC] Cache scalar cost..Sep 13 2021, 3:08 AM

ebrevnov added a child revision: D109680: [LV][WIP] Generalize cost calculation.

Harbormaster completed remote builds in B123638: Diff 372197.Sep 13 2021, 4:22 AM

ebrevnov edited the summary of this revision. (Show Details)Sep 13 2021, 5:31 AM

ebrevnov added reviewers: fhahn, lebedev.ri, Ayal, anna.

Ping

Herald added a project: Restricted Project. · View Herald TranscriptJul 19 2022, 2:36 AM

This review may be stuck/dead, consider abandoning if no longer relevant.
Removing myself as reviewer in attempt to clean dashboard.

Herald added subscribers: • pcwang-thead, StephenFan. · View Herald TranscriptJan 12 2023, 5:31 PM

Revision Contents

Path

Size

llvm/

lib/

Transforms/

Vectorize/

LoopVectorize.cpp

18 lines

Diff 372197

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

//===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===//		//===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===//
		Lint: Lint Inline Actions clang-format suggested style edits found: Lint: Lint: clang-format suggested style edits found:
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops		// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
▲ Show 20 Lines • Show All 1,900 Lines • ▼ Show 20 Lines	public:

/// Values to ignore in the cost model when VF > 1.		/// Values to ignore in the cost model when VF > 1.
SmallPtrSet<const Value *, 16> VecValuesToIgnore;		SmallPtrSet<const Value *, 16> VecValuesToIgnore;

/// All element types found in the loop.		/// All element types found in the loop.
SmallPtrSet<Type *, 16> ElementTypesInLoop;		SmallPtrSet<Type *, 16> ElementTypesInLoop;

/// Profitable vector factors.		/// Profitable vector factors.
SmallVector<VectorizationFactor, 8> ProfitableVFs;		SmallVector<VectorizationFactor, 8> CachedVFs;

/// Cached cost of one scalar iteraton;		/// Cached cost of one scalar iteraton;
VectorizationFactor ScalarVF = {ElementCount::getFixed(1),		VectorizationFactor ScalarVF = {ElementCount::getFixed(1),
InstructionCost::getInvalid()};		InstructionCost::getInvalid()};
};		};
} // end namespace llvm		} // end namespace llvm

/// Helper struct to manage generating runtime checks for vectorization.		/// Helper struct to manage generating runtime checks for vectorization.
▲ Show 20 Lines • Show All 4,176 Lines • ▼ Show 20 Lines	for (const auto &i : VFCandidates) {

if (!C.second && !ForceVectorization) {		if (!C.second && !ForceVectorization) {
LLVM_DEBUG(		LLVM_DEBUG(
dbgs() << "LV: Not considering vector loop of width " << i		dbgs() << "LV: Not considering vector loop of width " << i
<< " because it will not generate any vector instructions.\n");		<< " because it will not generate any vector instructions.\n");
continue;		continue;
}		}

// If profitable add it to ProfitableVF list.		// Cache calculated cost of the candidate since it will be needed again
if (isMoreProfitable(Candidate, getScalarVF()))		// during epilogue vectorization cost modeling.
ProfitableVFs.push_back(Candidate);		CachedVFs.push_back(Candidate);

if (isMoreProfitable(Candidate, ChosenFactor))		if (isMoreProfitable(Candidate, ChosenFactor))
ChosenFactor = Candidate;		ChosenFactor = Candidate;
}		}

// Emit a report of VFs with invalid costs in the loop.		// Emit a report of VFs with invalid costs in the loop.
if (!InvalidCosts.empty()) {		if (!InvalidCosts.empty()) {
// Group the remarks per instruction, keeping the instruction order from		// Group the remarks per instruction, keeping the instruction order from
▲ Show 20 Lines • Show All 123 Lines • ▼ Show 20 Lines	bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
if (VF.getFixedValue() >= EpilogueVectorizationMinVF)		if (VF.getFixedValue() >= EpilogueVectorizationMinVF)
return true;		return true;
return false;		return false;
}		}

VectorizationFactor		VectorizationFactor
LoopVectorizationCostModel::selectEpilogueVectorizationFactor(		LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
const ElementCount MainLoopVF, const LoopVectorizationPlanner &LVP) {		const ElementCount MainLoopVF, const LoopVectorizationPlanner &LVP) {
VectorizationFactor Result = VectorizationFactor::Disabled();		VectorizationFactor Result = getScalarVF();
if (!EnableEpilogueVectorization) {		if (!EnableEpilogueVectorization) {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is disabled.\n";);		LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is disabled.\n";);
return Result;		return Result;
}		}

if (!isScalarEpilogueAllowed()) {		if (!isScalarEpilogueAllowed()) {
LLVM_DEBUG(		LLVM_DEBUG(
dbgs() << "LEV: Unable to vectorize epilogue because no epilogue is "		dbgs() << "LEV: Unable to vectorize epilogue because no epilogue is "
Show All 38 Lines	LLVM_DEBUG(
dbgs()		dbgs()
<< "LEV: Epilogue vectorization skipped due to opt for size.\n";);		<< "LEV: Epilogue vectorization skipped due to opt for size.\n";);
return Result;		return Result;
}		}

if (!isEpilogueVectorizationProfitable(MainLoopVF))		if (!isEpilogueVectorizationProfitable(MainLoopVF))
return Result;		return Result;

for (auto &NextVF : ProfitableVFs)		for (auto &NextVF : CachedVFs) {
		if (NextVF.Width.isScalar())
		continue;
if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) &&		if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) &&
(Result.Width.getFixedValue() == 1 \|\|		isMoreProfitable(NextVF, Result) &&
isMoreProfitable(NextVF, Result)) &&
LVP.hasPlanWithVFs({MainLoopVF, NextVF.Width}))		LVP.hasPlanWithVFs({MainLoopVF, NextVF.Width}))
Result = NextVF;		Result = NextVF;
		}

if (Result != VectorizationFactor::Disabled())		if (Result != VectorizationFactor::Disabled())
LLVM_DEBUG(dbgs() << "LEV: Vectorizing epilogue loop with VF = "		LLVM_DEBUG(dbgs() << "LEV: Vectorizing epilogue loop with VF = "
<< Result.Width.getFixedValue() << "\n";);		<< Result.Width.getFixedValue() << "\n";);
return Result;		return Result;
}		}

std::pair<unsigned, unsigned>		std::pair<unsigned, unsigned>
▲ Show 20 Lines • Show All 4,313 Lines • Show Last 20 Lines