This is an archive of the discontinued LLVM Phabricator instance.

[LoopVectorizer] Fix in getScalarizationOverhead()
ClosedPublic

Authored by jonpa on Sep 21 2018, 4:53 AM.

Download Raw Diff

Details

Reviewers

hfinkel
uweigand

Summary

If target returns false in TTI.prefersVectorizedAddressing(), it means the address registers will not need to be extracted. Therefore, there should be no operands scalarization overhead for a load instruction.

Test for SystemZ.

Diff Detail

Event Timeline

jonpa created this revision.Sep 21 2018, 4:53 AM

LGTM

This revision is now accepted and ready to land.Sep 21 2018, 10:09 AM

jonpa mentioned this in D52417: [LoopVectorizer] Don't pass the instruction pointer from getMemInstScalarizationCost..Sep 24 2018, 6:42 AM

Thanks for review.

Updated just the test to make it more clear that the address computation is in fact scalar (the multiply).

LGTM as well.

Thanks for review. I am waiting with committing this until https://reviews.llvm.org/D52417 is approved, which is a simple fix I found necessary after applying this patch.

Committed in r345603.

Revision Contents

Path

Size

lib/

Transforms/

Vectorize/

LoopVectorize.cpp

4 lines

test/

Transforms/

LoopVectorize/

SystemZ/

load-scalarization-cost-0.ll

27 lines

Diff 167303

lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,881 Lines • ▼ Show 20 Lines	static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,

unsigned Cost = 0;		unsigned Cost = 0;
Type *RetTy = ToVectorTy(I->getType(), VF);		Type *RetTy = ToVectorTy(I->getType(), VF);
if (!RetTy->isVoidTy() &&		if (!RetTy->isVoidTy() &&
(!isa<LoadInst>(I) \|\|		(!isa<LoadInst>(I) \|\|
!TTI.supportsEfficientVectorElementLoadStore()))		!TTI.supportsEfficientVectorElementLoadStore()))
Cost += TTI.getScalarizationOverhead(RetTy, true, false);		Cost += TTI.getScalarizationOverhead(RetTy, true, false);

		// Some targets keep addresses scalar.
		if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
		return Cost;

if (CallInst *CI = dyn_cast<CallInst>(I)) {		if (CallInst *CI = dyn_cast<CallInst>(I)) {
SmallVector<const Value *, 4> Operands(CI->arg_operands());		SmallVector<const Value *, 4> Operands(CI->arg_operands());
Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);		Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
}		}
else if (!isa<StoreInst>(I) \|\|		else if (!isa<StoreInst>(I) \|\|
!TTI.supportsEfficientVectorElementLoadStore()) {		!TTI.supportsEfficientVectorElementLoadStore()) {
SmallVector<const Value *, 4> Operands(I->operand_values());		SmallVector<const Value *, 4> Operands(I->operand_values());
Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);		Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
▲ Show 20 Lines • Show All 4,388 Lines • Show Last 20 Lines

test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-0.ll

This file was added.

				; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \
				; RUN: -force-vector-width=2 -debug-only=loop-vectorize \
				; RUN: -disable-output < %s 2>&1 \| FileCheck %s
				; REQUIRES: asserts
				;
				; Check that a scalarized load does not get operands scalarization costs added.

				define void @fun(i64* %data, i64 %n, i64 %s, double* %Src) {
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%mul = mul nsw i64 %iv, %s
				%gep = getelementptr inbounds double, double* %Src, i64 %mul
				%bct = bitcast double* %gep to i64*
				%ld = load i64, i64* %bct
				%iv.next = add nuw nsw i64 %iv, 1
				%cmp110.us = icmp slt i64 %iv.next, %n
				br i1 %cmp110.us, label %for.body, label %for.end

				for.end:
				ret void

				; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %mul = mul nsw i64 %iv, %s
				; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %ld = load i64, i64* %bct
				}