This is an archive of the discontinued LLVM Phabricator instance.

[InstCombine] Fold insert sequence if first ins has multiple users.
ClosedPublic

Authored by fhahn on Aug 23 2017, 7:30 AM.

Download Raw Diff

Details

Reviewers

grosser
mkuper
fpetrogalli
efriedma

Commits

rGb992feee1327: [InstCombine] Fold insert sequence if first ins has multiple users.
rL312110: [InstCombine] Fold insert sequence if first ins has multiple users.

Summary

If the first insertelement instruction has multiple users and inserts at
position 0, we can re-use this instruction when folding a chain of
insertelement instructions. As we need to generate the first
insertelement instruction anyways, this should be a strict improvement.

We could get rid of the restriction of inserting at position 0 by
creating a different shufflemask, but it is probably worth to keep the
first insertelement instruction with position 0, as this is easier to do
efficiently than at other positions I think.

Diff Detail

Event Timeline

fhahn created this revision.Aug 23 2017, 7:30 AM

Very nice. This looks good from my perspective!

fpetrogalli added inline comments.Aug 24 2017, 2:25 AM

test/Transforms/InstCombine/broadcast.ll
63	Hi Florian, this looks good to me too, but shouldn't you also check that the transformation is not happening if any of the lanes of %ins1, other than the first one, is not an undef? Or is this case covered somewhere else? Francesco

Thanks Francesco, I've added a test case for both cases and fixed the condition.

fhahn marked an inline comment as done.Aug 25 2017, 10:13 AM

grosser added a subscriber: gareevroman.Aug 26 2017, 12:52 AM

This LGTM now.

Thanks,

Francesco

This revision is now accepted and ready to land.Aug 29 2017, 3:39 AM

fhahn closed this revision.Aug 30 2017, 3:55 AM

Revision Contents

Path

Size

lib/

Transforms/

InstCombine/

InstCombineVectorOps.cpp

24 lines

test/

Transforms/

InstCombine/

broadcast.ll

28 lines

Diff 112708

lib/Transforms/InstCombine/InstCombineVectorOps.cpp

Show First 20 Lines • Show All 609 Lines • ▼ Show 20 Lines	static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
// Do not try to do this for a one-element vector, since that's a nop,		// Do not try to do this for a one-element vector, since that's a nop,
// and will cause an inf-loop.		// and will cause an inf-loop.
if (NumElements == 1)		if (NumElements == 1)
return nullptr;		return nullptr;

Value *SplatVal = InsElt.getOperand(1);		Value *SplatVal = InsElt.getOperand(1);
InsertElementInst *CurrIE = &InsElt;		InsertElementInst *CurrIE = &InsElt;
SmallVector<bool, 16> ElementPresent(NumElements, false);		SmallVector<bool, 16> ElementPresent(NumElements, false);
		InsertElementInst *FirstIE = nullptr;

// Walk the chain backwards, keeping track of which indices we inserted into,		// Walk the chain backwards, keeping track of which indices we inserted into,
// until we hit something that isn't an insert of the splatted value.		// until we hit something that isn't an insert of the splatted value.
while (CurrIE) {		while (CurrIE) {
ConstantInt *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));		ConstantInt *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
if (!Idx \|\| CurrIE->getOperand(1) != SplatVal)		if (!Idx \|\| CurrIE->getOperand(1) != SplatVal)
return nullptr;		return nullptr;

// Check none of the intermediate steps have any additional uses.		InsertElementInst *NextIE =
if ((CurrIE != &InsElt) && !CurrIE->hasOneUse())		dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
		// Check none of the intermediate steps have any additional uses, except
		// for the root insertelement instruction, which can be re-used, if it
		// inserts at position 0.
		if (CurrIE != &InsElt &&
		(!CurrIE->hasOneUse() && (NextIE != nullptr \|\| !Idx->isZero())))
return nullptr;		return nullptr;

ElementPresent[Idx->getZExtValue()] = true;		ElementPresent[Idx->getZExtValue()] = true;
CurrIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));		FirstIE = CurrIE;
		CurrIE = NextIE;
}		}

// Make sure we've seen an insert into every element.		// Make sure we've seen an insert into every element.
if (llvm::any_of(ElementPresent, [](bool Present) { return !Present; }))		if (llvm::any_of(ElementPresent, [](bool Present) { return !Present; }))
return nullptr;		return nullptr;

// All right, create the insert + shuffle.		// All right, create the insert + shuffle.
Instruction *InsertFirst = InsertElementInst::Create(		Instruction *InsertFirst;
		if (cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
		InsertFirst = FirstIE;
		else
		InsertFirst = InsertElementInst::Create(
UndefValue::get(VT), SplatVal,		UndefValue::get(VT), SplatVal,
ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), 0), "", &InsElt);		ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), 0),
		"", &InsElt);

Constant *ZeroMask = ConstantAggregateZero::get(		Constant *ZeroMask = ConstantAggregateZero::get(
VectorType::get(Type::getInt32Ty(InsElt.getContext()), NumElements));		VectorType::get(Type::getInt32Ty(InsElt.getContext()), NumElements));

return new ShuffleVectorInst(InsertFirst, UndefValue::get(VT), ZeroMask);		return new ShuffleVectorInst(InsertFirst, UndefValue::get(VT), ZeroMask);
}		}

/// If we have an insertelement instruction feeding into another insertelement		/// If we have an insertelement instruction feeding into another insertelement
▲ Show 20 Lines • Show All 843 Lines • Show Last 20 Lines

test/Transforms/InstCombine/broadcast.ll

Show First 20 Lines • Show All 45 Lines • ▼ Show 20 Lines	define <4 x float> @good4(float %arg) {
%tmp = insertelement <4 x float> zeroinitializer, float %arg, i32 0		%tmp = insertelement <4 x float> zeroinitializer, float %arg, i32 0
%tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1		%tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
%tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2		%tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
%tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3		%tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
%tmp7 = fadd <4 x float> %tmp6, %tmp6		%tmp7 = fadd <4 x float> %tmp6, %tmp6
ret <4 x float> %tmp7		ret <4 x float> %tmp7
}		}

		; CHECK-LABEL: @good5(
		; CHECK-NEXT: %ins1 = insertelement <4 x float> undef, float %v, i32 0
		; CHECK-NEXT: %a1 = fadd <4 x float> %ins1, %ins1
		; CHECK-NEXT: %ins4 = shufflevector <4 x float> %ins1, <4 x float> undef, <4 x i32> zeroinitializer
		; CHECK-NEXT: %res = fadd <4 x float> %a1, %ins4
		; CHECK-NEXT: ret <4 x float> %res
		define <4 x float> @good5(float %v) {
		%ins1 = insertelement <4 x float> undef, float %v, i32 0
		%a1 = fadd <4 x float> %ins1, %ins1
		%ins2 = insertelement<4 x float> %ins1, float %v, i32 1
		fpetrogalliUnsubmitted Done Reply Inline Actions Hi Florian, this looks good to me too, but shouldn't you also check that the transformation is not happening if any of the lanes of %ins1, other than the first one, is not an undef? Or is this case covered somewhere else? Francesco fpetrogalli: Hi Florian, this looks good to me too, but shouldn't you also check that the transformation is…
		%ins3 = insertelement<4 x float> %ins2, float %v, i32 2
		%ins4 = insertelement<4 x float> %ins3, float %v, i32 3
		%res = fadd <4 x float> %a1, %ins4
		ret <4 x float> %res
		}

; CHECK-LABEL: bad1		; CHECK-LABEL: bad1
; CHECK-NOT: shufflevector		; CHECK-NOT: shufflevector
define <4 x float> @bad1(float %arg) {		define <4 x float> @bad1(float %arg) {
%tmp = insertelement <4 x float> undef, float %arg, i32 1		%tmp = insertelement <4 x float> undef, float %arg, i32 1
%tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1		%tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
%tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2		%tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
%tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3		%tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
ret <4 x float> %tmp6		ret <4 x float> %tmp6
Show All 40 Lines
; CHECK-NOT: shufflevector		; CHECK-NOT: shufflevector
define <4 x float> @bad6(float %arg, i32 %k) {		define <4 x float> @bad6(float %arg, i32 %k) {
%tmp = insertelement <4 x float> undef, float %arg, i32 0		%tmp = insertelement <4 x float> undef, float %arg, i32 0
%tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1		%tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
%tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 %k		%tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 %k
%tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3		%tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
ret <4 x float> %tmp6		ret <4 x float> %tmp6
}		}

		; CHECK-LABEL: @bad7(
		; CHECK-NOT: shufflevector
		define <4 x float> @bad7(float %v) {
		%ins1 = insertelement <4 x float> undef, float %v, i32 1
		%a1 = fadd <4 x float> %ins1, %ins1
		%ins2 = insertelement<4 x float> %ins1, float %v, i32 2
		%ins3 = insertelement<4 x float> %ins2, float %v, i32 3
		%ins4 = insertelement<4 x float> %ins3, float %v, i32 0
		%res = fadd <4 x float> %a1, %ins4
		ret <4 x float> %res
		}