This is an archive of the discontinued LLVM Phabricator instance.

[SystemZ::TTI] Return zero cost for icmp in case of Load-And-Test
ClosedPublic

Authored by jonpa on Nov 30 2018, 2:55 AM.

Download Raw Diff

Details

Reviewers

Summary

A loaded value with multiple users compared with 0 will become a load and test single instruction. The load is not folded in this case (multiple users), but the compare instruction is eliminated.

This patch returns 0 cost for the icmp in these cases.

This changes just 33 instruction query results. One file changed on spec - two loops are now are kept scalar (not vectorized), see:

report_LT44 KBDownload

I tried also handling the load i32 ->sext i64 case, but this doubled the LOCs of the patch, while changing absolutely nothing (not a single LV query / file), so this seemed not useful enough to keep.

Diff Detail

Event Timeline

jonpa created this revision.Nov 30 2018, 2:55 AM

LGTM, thanks!

This revision is now accepted and ready to land.Dec 3 2018, 5:40 AM

r348141

Revision Contents

Path

Size

lib/

Target/

SystemZ/

SystemZTargetTransformInfo.cpp

10 lines

test/

Analysis/

CostModel/

SystemZ/

load-and-test.ll

25 lines

Diff 176068

lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

Show First 20 Lines • Show All 833 Lines • ▼ Show 20 Lines	else { // Called with a select instruction.
getVectorBitmaskConversionCost(CmpOpTy, ValTy);		getVectorBitmaskConversionCost(CmpOpTy, ValTy);

return getNumVectorRegs(ValTy) /vsel/ + PackCost;		return getNumVectorRegs(ValTy) /vsel/ + PackCost;
}		}
}		}
else { // Scalar		else { // Scalar
switch (Opcode) {		switch (Opcode) {
case Instruction::ICmp: {		case Instruction::ICmp: {
		// A loaded value compared with 0 with multiple users becomes Load and
		// Test. The load is then not foldable, so return 0 cost for the ICmp.
		unsigned ScalarBits = ValTy->getScalarSizeInBits();
		if (I != nullptr && ScalarBits >= 32)
		if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
		if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
		if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
		C->getZExtValue() == 0)
		return 0;

unsigned Cost = 1;		unsigned Cost = 1;
if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)		if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);		Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
return Cost;		return Cost;
}		}
case Instruction::Select:		case Instruction::Select:
if (ValTy->isFloatingPointTy())		if (ValTy->isFloatingPointTy())
return 4; // No load on condition for FP - costs a conditional jump.		return 4; // No load on condition for FP - costs a conditional jump.
▲ Show 20 Lines • Show All 266 Lines • Show Last 20 Lines

test/Analysis/CostModel/SystemZ/load-and-test.ll

This file was added.

				; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \| FileCheck %s
				;
				; Test that load and test results in 0 cost for the compare.

				define i64 @fun0(i64* %Src, i64 %Arg) {
				%Ld1 = load i64, i64* %Src
				%Cmp = icmp eq i64 %Ld1, 0
				%S = select i1 %Cmp, i64 %Arg, i64 %Ld1
				ret i64 %S
				; CHECK: Printing analysis 'Cost Model Analysis' for function 'fun0':
				; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load i64, i64* %Src
				; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Cmp = icmp eq i64 %Ld1, 0
				; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %S = select
				}

				define i32 @fun1(i32* %Src, i32 %Arg) {
				%Ld1 = load i32, i32* %Src
				%Cmp = icmp eq i32 %Ld1, 0
				%S = select i1 %Cmp, i32 %Arg, i32 %Ld1
				ret i32 %S
				; CHECK: Printing analysis 'Cost Model Analysis' for function 'fun1':
				; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load i32, i32* %Src
				; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Cmp = icmp eq i32 %Ld1, 0
				; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %S = select
				}