This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/lib/
-
lib/
-
IR/
-
IRBuilder.cpp
-
Transforms/Vectorize/
-
Vectorize/
4/5
LoopVectorize.cpp

Differential D100763

[LoopVectorize] Don't create unnecessary vscale intrinsic calls
ClosedPublic

Authored by david-arm on Apr 19 2021, 7:21 AM.

Download Raw Diff

Details

Reviewers

sdesmalen
c-rhodes
frasercrmck
kmclaughlin

Commits

rG5a229a6702e2: [LoopVectorize] Don't create unnecessary vscale intrinsic calls

Summary

In quite a few cases in LoopVectorize.cpp we call createStepForVF
with a step value of 0, which leads to unnecessary generation of
llvm.vscale intrinsic calls. I've optimised IRBuilder::CreateVScale
and createStepForVF to return 0 when attempting to multiply
vscale by 0.

Diff Detail

Event Timeline

david-arm created this revision.Apr 19 2021, 7:21 AM

Herald added a subscriber: hiraditya. · View Herald TranscriptApr 19 2021, 7:21 AM

david-arm requested review of this revision.Apr 19 2021, 7:21 AM

Herald added a project: Restricted Project. · View Herald TranscriptApr 19 2021, 7:21 AM

Herald added a subscriber: llvm-commits. · View Herald Transcript

Harbormaster completed remote builds in B99474: Diff 338515.Apr 19 2021, 8:43 AM

frasercrmck added inline comments.Apr 20 2021, 1:22 AM

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
1116	Not that this is wrong, but would it be inappropriate to make `CreateVScale` detect a zero step and return zero?

frasercrmck added inline comments.Apr 20 2021, 1:23 AM

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
1116	Sorry I just realised that's what you've done. Do we need to make this change, then?

david-arm added inline comments.Apr 20 2021, 1:30 AM

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
1116	It's not really necessary as the end result is the same - I was just trying to avoid the compiler doing more work for the Step=0 case, i.e. the creation of a ConstantInt, etc. I noticed there are a few places in the vectoriser where we call this function with a Step of 0 - it happens when widening a PHI instruction or vectorising induction variables. If you prefer I can just revert this code?

frasercrmck added inline comments.Apr 20 2021, 1:33 AM

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
1116	I don't feel particularly strongly. I suppose my initial comment was trying to avoid repeating ourselves and duplicating logic. But I also realise that this is how compile-time regressions creep in. I don't know how impactful the creation of a zero ConstantInt is? Presumably it's cached more often than not?

sdesmalen added inline comments.Apr 20 2021, 1:34 AM

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
1116	The overhead of creating the ConstantInt is small, and this only happens on a small number of cases when the step value is actually zero. Personally I'd prefer this to be implemented only once in IRBuilder, and have this change removed to make the code simpler.

Reverted change to createStepForVF

david-arm marked 3 inline comments as done.Apr 20 2021, 3:54 AM

Is it worth adding a test to llvm/unittests/IR/IRBuilderTest.cpp for this change?

Harbormaster completed remote builds in B99675: Diff 338804.Apr 20 2021, 5:31 AM

Added unit test for vscale * 0 case.

LGTM, thanks for adding the test.

This revision is now accepted and ready to land.Apr 21 2021, 2:46 AM

LGTM too, thanks.

Harbormaster completed remote builds in B99925: Diff 339154.Apr 21 2021, 3:23 AM

Closed by commit rG5a229a6702e2: [LoopVectorize] Don't create unnecessary vscale intrinsic calls (authored by david-arm). · Explain WhyApr 22 2021, 1:02 AM

This revision was automatically updated to reflect the committed changes.

david-arm added a commit: rG5a229a6702e2: [LoopVectorize] Don't create unnecessary vscale intrinsic calls.

Revision Contents

Path

Size

llvm/

lib/

IR/

IRBuilder.cpp

4 lines

Transforms/

Vectorize/

LoopVectorize.cpp

15 lines

Diff 338515

llvm/lib/IR/IRBuilder.cpp

Show First 20 Lines • Show All 75 Lines • ▼ Show 20 Lines	static CallInst createCallHelper(Function Callee, ArrayRef<Value *> Ops,
ArrayRef<OperandBundleDef> OpBundles = {}) {		ArrayRef<OperandBundleDef> OpBundles = {}) {
CallInst *CI = Builder->CreateCall(Callee, Ops, OpBundles, Name);		CallInst *CI = Builder->CreateCall(Callee, Ops, OpBundles, Name);
if (FMFSource)		if (FMFSource)
CI->copyFastMathFlags(FMFSource);		CI->copyFastMathFlags(FMFSource);
return CI;		return CI;
}		}

Value IRBuilderBase::CreateVScale(Constant Scaling, const Twine &Name) {		Value IRBuilderBase::CreateVScale(Constant Scaling, const Twine &Name) {
Module *M = GetInsertBlock()->getParent()->getParent();
assert(isa<ConstantInt>(Scaling) && "Expected constant integer");		assert(isa<ConstantInt>(Scaling) && "Expected constant integer");
		if (cast<ConstantInt>(Scaling)->isZero())
		return Scaling;
		Module *M = GetInsertBlock()->getParent()->getParent();
Function *TheFn =		Function *TheFn =
Intrinsic::getDeclaration(M, Intrinsic::vscale, {Scaling->getType()});		Intrinsic::getDeclaration(M, Intrinsic::vscale, {Scaling->getType()});
CallInst *CI = createCallHelper(TheFn, {}, this, Name);		CallInst *CI = createCallHelper(TheFn, {}, this, Name);
return cast<ConstantInt>(Scaling)->getSExtValue() == 1		return cast<ConstantInt>(Scaling)->getSExtValue() == 1
? CI		? CI
: CreateMul(CI, Scaling);		: CreateMul(CI, Scaling);
}		}

▲ Show 20 Lines • Show All 1,089 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,102 Lines • ▼ Show 20 Lines	static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName,
OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);		OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
R << "loop not vectorized: ";		R << "loop not vectorized: ";
return R;		return R;
}		}

/// Return a value for Step multiplied by VF.		/// Return a value for Step multiplied by VF.
static Value createStepForVF(IRBuilder<> &B, Constant Step, ElementCount VF) {		static Value createStepForVF(IRBuilder<> &B, Constant Step, ElementCount VF) {
assert(isa<ConstantInt>(Step) && "Expected an integer step");		assert(isa<ConstantInt>(Step) && "Expected an integer step");
Constant *StepVal = ConstantInt::get(		uint64_t StepVal = cast<ConstantInt>(Step)->getSExtValue();
Step->getType(),		if (StepVal == 0)
cast<ConstantInt>(Step)->getSExtValue() * VF.getKnownMinValue());		return Step;
return VF.isScalable() ? B.CreateVScale(StepVal) : StepVal;		Constant *NewStep =
		ConstantInt::get(Step->getType(), StepVal * VF.getKnownMinValue());
		return VF.isScalable() ? B.CreateVScale(NewStep) : NewStep;
		frasercrmckUnsubmitted Done Reply Inline Actions Not that this is wrong, but would it be inappropriate to make `CreateVScale` detect a zero step and return zero? frasercrmck: Not that this is wrong, but would it be inappropriate to make `CreateVScale` detect a zero step…
		frasercrmckUnsubmitted Not Done Reply Inline Actions Sorry I just realised that's what you've done. Do we need to make this change, then? frasercrmck: Sorry I just realised that's what you've done. Do we need to make this change, then?
		david-armAuthorUnsubmitted Done Reply Inline Actions It's not really necessary as the end result is the same - I was just trying to avoid the compiler doing more work for the Step=0 case, i.e. the creation of a ConstantInt, etc. I noticed there are a few places in the vectoriser where we call this function with a Step of 0 - it happens when widening a PHI instruction or vectorising induction variables. If you prefer I can just revert this code? david-arm: It's not really necessary as the end result is the same - I was just trying to avoid the…
		frasercrmckUnsubmitted Done Reply Inline Actions I don't feel particularly strongly. I suppose my initial comment was trying to avoid repeating ourselves and duplicating logic. But I also realise that this is how compile-time regressions creep in. I don't know how impactful the creation of a zero ConstantInt is? Presumably it's cached more often than not? frasercrmck: I don't feel particularly strongly. I suppose my initial comment was trying to avoid repeating…
		sdesmalenUnsubmitted Done Reply Inline Actions The overhead of creating the ConstantInt is small, and this only happens on a small number of cases when the step value is actually zero. Personally I'd prefer this to be implemented only once in IRBuilder, and have this change removed to make the code simpler. sdesmalen: The overhead of creating the ConstantInt is small, and this only happens on a small number of…
}		}

namespace llvm {		namespace llvm {

/// Return the runtime value for VF.		/// Return the runtime value for VF.
Value getRuntimeVF(IRBuilder<> &B, Type Ty, ElementCount VF) {		Value getRuntimeVF(IRBuilder<> &B, Type Ty, ElementCount VF) {
Constant *EC = ConstantInt::get(Ty, VF.getKnownMinValue());		Constant *EC = ConstantInt::get(Ty, VF.getKnownMinValue());
return VF.isScalable() ? B.CreateVScale(EC) : EC;		return VF.isScalable() ? B.CreateVScale(EC) : EC;
▲ Show 20 Lines • Show All 3,640 Lines • ▼ Show 20 Lines	if (Cost->isScalarAfterVectorization(P, State.VF)) {
// Determine the number of scalars we need to generate for each unroll		// Determine the number of scalars we need to generate for each unroll
// iteration. If the instruction is uniform, we only need to generate the		// iteration. If the instruction is uniform, we only need to generate the
// first lane. Otherwise, we generate all VF values.		// first lane. Otherwise, we generate all VF values.
bool IsUniform = Cost->isUniformAfterVectorization(P, State.VF);		bool IsUniform = Cost->isUniformAfterVectorization(P, State.VF);
assert((IsUniform \|\| !VF.isScalable()) &&		assert((IsUniform \|\| !VF.isScalable()) &&
"Currently unsupported for scalable vectors");		"Currently unsupported for scalable vectors");
unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();		unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();

Value *RuntimeVF = getRuntimeVF(Builder, PtrInd->getType(), VF);
for (unsigned Part = 0; Part < UF; ++Part) {		for (unsigned Part = 0; Part < UF; ++Part) {
Value *PartStart = Builder.CreateMul(		Value *PartStart = createStepForVF(
RuntimeVF, ConstantInt::get(PtrInd->getType(), Part));		Builder, ConstantInt::get(PtrInd->getType(), Part), VF);
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {		for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
Value *Idx = Builder.CreateAdd(		Value *Idx = Builder.CreateAdd(
PartStart, ConstantInt::get(PtrInd->getType(), Lane));		PartStart, ConstantInt::get(PtrInd->getType(), Lane));
Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);		Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
Value *SclrGep =		Value *SclrGep =
emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II);		emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II);
SclrGep->setName("next.gep");		SclrGep->setName("next.gep");
State.set(PhiR, SclrGep, VPIteration(Part, Lane));		State.set(PhiR, SclrGep, VPIteration(Part, Lane));
▲ Show 20 Lines • Show All 5,297 Lines • Show Last 20 Lines