This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Transforms/AggressiveInstCombine/
-
Transforms/
-
AggressiveInstCombine/
-
TruncInstCombine.cpp
-
test/Transforms/AggressiveInstCombine/
-
Transforms/
-
AggressiveInstCombine/
-
trunc_vector_instrs.ll

Differential D122233

[AggressiveInstCombine] Add `shufflevector` instr support to `TruncInstCombine`
AbandonedPublic

Authored by anton-afanasyev on Mar 22 2022, 8:05 AM.

Download Raw Diff

Details

Reviewers

lebedev.ri
RKSimon

Summary

Add shufflevector instruction to the expression graph post-dominated by trunc,
allowing TruncInstCombine to reduce bitwidth of expressions containing these
instructions.

Fixes #54149

Diff Detail

Repository: rG LLVM Github Monorepo

Unit TestsFailed

	Time	Test
	60,030 ms	x64 debian > libFuzzer.libFuzzer::large.test

Event Timeline

anton-afanasyev created this revision.Mar 22 2022, 8:05 AM

Herald added a project: Restricted Project. · View Herald TranscriptMar 22 2022, 8:05 AM

Herald added a subscriber: hiraditya. · View Herald Transcript

anton-afanasyev requested review of this revision.Mar 22 2022, 8:05 AM

Herald added a project: Restricted Project. · View Herald TranscriptMar 22 2022, 8:05 AM

Herald added a subscriber: llvm-commits. · View Herald Transcript

anton-afanasyev edited the summary of this revision. (Show Details)Mar 22 2022, 8:06 AM

Do we expect that unused inputs of the shuffle has been already replaced with undef?

In D122233#3399664, @lebedev.ri wrote:

Do we expect that unused inputs of the shuffle has been already replaced with undef?

I don't see this could be an issue. For instance, @unary_shuffle() test-case contains shuffle with undef (line 25). Do you mean this case?

Harbormaster completed remote builds in B155629: Diff 417301.Mar 22 2022, 8:53 AM

In D122233#3399767, @anton-afanasyev wrote:

In D122233#3399664, @lebedev.ri wrote:

Do we expect that unused inputs of the shuffle has been already replaced with undef?

I don't see this could be an issue. For instance, @unary_shuffle() test-case contains shuffle with undef (line 25). Do you mean this case?

I mean, what if we have a two-input shuffle, and one of the operands is unused as per the shuffle mask.
Then, said operand can be replaced with undef, which doesn't affect the narrowing, while the original operand might?
I guess it's a theoretical question, mainly.

This might introduce regressions as the shuffle costs for the same mask but different element types can vary considerably (SSE v4i32/v4i16 unary shuffles are really cheap but v4i8 or v4i64 can be a lot more expensive).

anton-afanasyev abandoned this revision.May 14 2022, 7:47 AM

FWIW we might be able to perform something similar inside VectorCombine

In D122233#3513581, @RKSimon wrote:

FWIW we might be able to perform something similar inside VectorCombine

Do you mean using TTI.getShuffleCost()? There's an issue here: we don't know the exact shuffle type at the moment we need to get its cost. We infer this type (given by MinBitWidth) after the expression graph has been built already. Need to refactor whole pass for this case, which looks redundant.

Revision Contents

Path

Size

llvm/

lib/

Transforms/

AggressiveInstCombine/

TruncInstCombine.cpp

14 lines

test/

Transforms/

AggressiveInstCombine/

trunc_vector_instrs.ll

20 lines

Diff 417301

llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp

Show First 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	static void getRelevantOperands(Instruction I, SmallVectorImpl<Value > &Ops) {
case Instruction::Or:		case Instruction::Or:
case Instruction::Xor:		case Instruction::Xor:
case Instruction::Shl:		case Instruction::Shl:
case Instruction::LShr:		case Instruction::LShr:
case Instruction::AShr:		case Instruction::AShr:
case Instruction::UDiv:		case Instruction::UDiv:
case Instruction::URem:		case Instruction::URem:
case Instruction::InsertElement:		case Instruction::InsertElement:
		case Instruction::ShuffleVector:
Ops.push_back(I->getOperand(0));		Ops.push_back(I->getOperand(0));
Ops.push_back(I->getOperand(1));		Ops.push_back(I->getOperand(1));
break;		break;
case Instruction::ExtractElement:		case Instruction::ExtractElement:
Ops.push_back(I->getOperand(0));		Ops.push_back(I->getOperand(0));
break;		break;
case Instruction::Select:		case Instruction::Select:
Ops.push_back(I->getOperand(1));		Ops.push_back(I->getOperand(1));
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines	while (!Worklist.empty()) {
case Instruction::Or:		case Instruction::Or:
case Instruction::Xor:		case Instruction::Xor:
case Instruction::Shl:		case Instruction::Shl:
case Instruction::LShr:		case Instruction::LShr:
case Instruction::AShr:		case Instruction::AShr:
case Instruction::UDiv:		case Instruction::UDiv:
case Instruction::URem:		case Instruction::URem:
case Instruction::InsertElement:		case Instruction::InsertElement:
		case Instruction::ShuffleVector:
case Instruction::ExtractElement:		case Instruction::ExtractElement:
case Instruction::Select: {		case Instruction::Select: {
SmallVector<Value *, 2> Operands;		SmallVector<Value *, 2> Operands;
getRelevantOperands(I, Operands);		getRelevantOperands(I, Operands);
append_range(Worklist, Operands);		append_range(Worklist, Operands);
break;		break;
}		}
case Instruction::PHI: {		case Instruction::PHI: {
SmallVector<Value *, 2> Operands;		SmallVector<Value *, 2> Operands;
getRelevantOperands(I, Operands);		getRelevantOperands(I, Operands);
// Add only operands not in Stack to prevent cycle		// Add only operands not in Stack to prevent cycle
for (auto *Op : Operands)		for (auto *Op : Operands)
if (all_of(Stack, [Op](Value *V) { return Op != V; }))		if (all_of(Stack, [Op](Value *V) { return Op != V; }))
Worklist.push_back(Op);		Worklist.push_back(Op);
break;		break;
}		}
default:		default:
// TODO: Can handle more cases here:		// TODO: Can handle more cases here: sdiv, srem, ...
// 1. shufflevector
// 2. sdiv, srem
// ...
return false;		return false;
}		}
}		}
return true;		return true;
}		}

unsigned TruncInstCombine::getMinBitWidth() {		unsigned TruncInstCombine::getMinBitWidth() {
SmallVector<Value *, 8> Worklist;		SmallVector<Value *, 8> Worklist;
▲ Show 20 Lines • Show All 274 Lines • ▼ Show 20 Lines	for (auto &Itr : InstInfoMap) { // Forward
}		}
case Instruction::InsertElement: {		case Instruction::InsertElement: {
Value *Vec = getReducedOperand(I->getOperand(0), SclTy);		Value *Vec = getReducedOperand(I->getOperand(0), SclTy);
Value *NewElt = getReducedOperand(I->getOperand(1), SclTy);		Value *NewElt = getReducedOperand(I->getOperand(1), SclTy);
Value *Idx = I->getOperand(2);		Value *Idx = I->getOperand(2);
Res = Builder.CreateInsertElement(Vec, NewElt, Idx);		Res = Builder.CreateInsertElement(Vec, NewElt, Idx);
break;		break;
}		}
		case Instruction::ShuffleVector: {
		Value *Vec1 = getReducedOperand(I->getOperand(0), SclTy);
		Value *Vec2 = getReducedOperand(I->getOperand(1), SclTy);
		ArrayRef<int> Mask = cast<ShuffleVectorInst>(I)->getShuffleMask();
		Res = Builder.CreateShuffleVector(Vec1, Vec2, Mask);
		break;
		}
case Instruction::Select: {		case Instruction::Select: {
Value *Op0 = I->getOperand(0);		Value *Op0 = I->getOperand(0);
Value *LHS = getReducedOperand(I->getOperand(1), SclTy);		Value *LHS = getReducedOperand(I->getOperand(1), SclTy);
Value *RHS = getReducedOperand(I->getOperand(2), SclTy);		Value *RHS = getReducedOperand(I->getOperand(2), SclTy);
Res = Builder.CreateSelect(Op0, LHS, RHS);		Res = Builder.CreateSelect(Op0, LHS, RHS);
break;		break;
}		}
case Instruction::PHI: {		case Instruction::PHI: {
▲ Show 20 Lines • Show All 90 Lines • Show Last 20 Lines

llvm/test/Transforms/AggressiveInstCombine/trunc_vector_instrs.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt < %s -aggressive-instcombine -S \| FileCheck %s			; RUN: opt < %s -aggressive-instcombine -S \| FileCheck %s

	define <4 x i16> @shuffle(<2 x i8> %a, <2 x i8> %b) {			define <4 x i16> @shuffle(<2 x i8> %a, <2 x i8> %b) {
	; CHECK-LABEL: @shuffle(			; CHECK-LABEL: @shuffle(
	; CHECK-NEXT: [[ZEXTA:%.]] = zext <2 x i8> [[A:%.]] to <2 x i32>			; CHECK-NEXT: [[ZEXTA:%.]] = zext <2 x i8> [[A:%.]] to <2 x i16>
	; CHECK-NEXT: [[ZEXTB:%.]] = zext <2 x i8> [[B:%.]] to <2 x i32>			; CHECK-NEXT: [[ZEXTB:%.]] = zext <2 x i8> [[B:%.]] to <2 x i16>
	; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[ZEXTA]], <2 x i32> [[ZEXTB]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>			; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i16> [[ZEXTA]], <2 x i16> [[ZEXTB]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
	; CHECK-NEXT: [[TRUNC:%.*]] = trunc <4 x i32> [[SHUF]] to <4 x i16>			; CHECK-NEXT: ret <4 x i16> [[SHUF]]
	; CHECK-NEXT: ret <4 x i16> [[TRUNC]]
	;			;
	%zexta = zext <2 x i8> %a to <2 x i32>			%zexta = zext <2 x i8> %a to <2 x i32>
	%zextb = zext <2 x i8> %b to <2 x i32>			%zextb = zext <2 x i8> %b to <2 x i32>
	%shuf = shufflevector <2 x i32> %zexta, <2 x i32> %zextb, <4 x i32> <i32 3, i32 2, i32 1, i32 0>			%shuf = shufflevector <2 x i32> %zexta, <2 x i32> %zextb, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
	%trunc = trunc <4 x i32> %shuf to <4 x i16>			%trunc = trunc <4 x i32> %shuf to <4 x i16>
	ret <4 x i16> %trunc			ret <4 x i16> %trunc
	}			}

	define <2 x i16> @unary_shuffle(<2 x i8> %a) {			define <2 x i16> @unary_shuffle(<2 x i8> %a) {
	; CHECK-LABEL: @unary_shuffle(			; CHECK-LABEL: @unary_shuffle(
	; CHECK-NEXT: [[ZEXTA:%.]] = zext <2 x i8> [[A:%.]] to <2 x i32>			; CHECK-NEXT: [[ZEXTA:%.]] = zext <2 x i8> [[A:%.]] to <2 x i16>
	; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[ZEXTA]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>			; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i16> [[ZEXTA]], <2 x i16> undef, <2 x i32> <i32 1, i32 0>
	; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i32> [[SHUF]] to <2 x i16>			; CHECK-NEXT: ret <2 x i16> [[SHUF]]
	; CHECK-NEXT: ret <2 x i16> [[TRUNC]]
	;			;
	%zexta = zext <2 x i8> %a to <2 x i32>			%zexta = zext <2 x i8> %a to <2 x i32>
	%shuf = shufflevector <2 x i32> %zexta, <2 x i32> undef, <2 x i32> <i32 1, i32 0>			%shuf = shufflevector <2 x i32> %zexta, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
	%trunc = trunc <2 x i32> %shuf to <2 x i16>			%trunc = trunc <2 x i32> %shuf to <2 x i16>
	ret <2 x i16> %trunc			ret <2 x i16> %trunc
	}			}

	define <4 x i16> @const_shuffle() {			define <4 x i16> @const_shuffle() {
	; CHECK-LABEL: @const_shuffle(			; CHECK-LABEL: @const_shuffle(
	; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 3, i32 7>, <4 x i32> <i32 3, i32 2, i32 1, i32 0>			; CHECK-NEXT: ret <4 x i16> <i16 7, i16 3, i16 2, i16 1>
	; CHECK-NEXT: [[TRUNC:%.*]] = trunc <4 x i32> [[SHUF]] to <4 x i16>
	; CHECK-NEXT: ret <4 x i16> [[TRUNC]]
	;			;
	%shuf = shufflevector <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 3, i32 7>, <4 x i32> <i32 3, i32 2, i32 1, i32 0>			%shuf = shufflevector <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 3, i32 7>, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
	%trunc = trunc <4 x i32> %shuf to <4 x i16>			%trunc = trunc <4 x i32> %shuf to <4 x i16>
	ret <4 x i16> %trunc			ret <4 x i16> %trunc
	}			}


	define <2 x i16> @extract_insert(<2 x i8> %a, <2 x i8> %b) {			define <2 x i16> @extract_insert(<2 x i8> %a, <2 x i8> %b) {
	▲ Show 20 Lines • Show All 45 Lines • Show Last 20 Lines