Diff 108498

lib/Transforms/Vectorize/SLPVectorizer.cpp

Show First 20 Lines • Show All 150 Lines • ▼ Show 20 Lines
/// \returns True if all of the values in \p VL are identical.		/// \returns True if all of the values in \p VL are identical.
static bool isSplat(ArrayRef<Value *> VL) {		static bool isSplat(ArrayRef<Value *> VL) {
for (unsigned i = 1, e = VL.size(); i < e; ++i)		for (unsigned i = 1, e = VL.size(); i < e; ++i)
if (VL[i] != VL[0])		if (VL[i] != VL[0])
return false;		return false;
return true;		return true;
}		}

		/// Checks if the vector of instructions can be represented as a shuffle, like:
		/// %x0 = extractelement <4 x i8> %x, i32 0
		/// %x3 = extractelement <4 x i8> %x, i32 3
		/// %y1 = extractelement <4 x i8> %y, i32 1
		/// %y2 = extractelement <4 x i8> %y, i32 2
		/// %x0x0 = mul i8 %x0, %x0
		/// %x3x3 = mul i8 %x3, %x3
		/// %y1y1 = mul i8 %y1, %y1
		/// %y2y2 = mul i8 %y2, %y2
		/// %ins1 = insertelement <4 x i8> undef, i8 %x0x0, i32 0
		/// %ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
		/// %ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
		/// %ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
		/// ret <4 x i8> %ins4
		/// can be transformed into:
		/// %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> <i32 0, i32 3, i32 5,
		/// i32 6>
		/// %2 = mul <4 x i8> %1, %1
		/// ret <4 x i8> %2
		/// We convert this initially to something like:
		/// %x0 = extractelement <4 x i8> %x, i32 0
		/// %x3 = extractelement <4 x i8> %x, i32 3
		/// %y1 = extractelement <4 x i8> %y, i32 1
		/// %y2 = extractelement <4 x i8> %y, i32 2
		/// %1 = insertelement <4 x i8> undef, i8 %x0, i32 0
		/// %2 = insertelement <4 x i8> %1, i8 %x3, i32 1
		/// %3 = insertelement <4 x i8> %2, i8 %y1, i32 2
		/// %4 = insertelement <4 x i8> %3, i8 %y2, i32 3
		/// %5 = mul <4 x i8> %4, %4
		/// %6 = extractelement <4 x i8> %5, i32 0
		/// %ins1 = insertelement <4 x i8> undef, i8 %6, i32 0
		/// %7 = extractelement <4 x i8> %5, i32 1
		/// %ins2 = insertelement <4 x i8> %ins1, i8 %7, i32 1
		/// %8 = extractelement <4 x i8> %5, i32 2
		/// %ins3 = insertelement <4 x i8> %ins2, i8 %8, i32 2
		/// %9 = extractelement <4 x i8> %5, i32 3
		/// %ins4 = insertelement <4 x i8> %ins3, i8 %9, i32 3
		/// ret <4 x i8> %ins4
		/// InstCombiner transforms this into a shuffle and vector mul
		static Optional<TargetTransformInfo::ShuffleKind>
		isShuffle(ArrayRef<Value *> VL) {
		auto *EI0 = cast<ExtractElementInst>(VL[0]);
		mkuperUnsubmitted Not Done Reply Inline Actions I think the name is a bit weird. If it's a blend, it's definitely a shuffle. What this really does is determine whether we're extracting from at most 2 sources, right? Maybe give a name that indicates this? mkuper: I think the name is a bit weird. If it's a blend, it's definitely a shuffle. What this really…
		ABataevAuthorUnsubmitted Not Done Reply Inline Actions Renamed it to `isShuffle`. Is this ok? ABataev: Renamed it to `isShuffle`. Is this ok?
		mkuperUnsubmitted Not Done Reply Inline Actions I think that makes sense. It's not ideal, but I don't have any good ideas either. "IsUpToTwoVectorShuffle" is... meh. mkuper: I think that makes sense. It's not ideal, but I don't have any good ideas either.
		unsigned Size = EI0->getVectorOperandType()->getVectorNumElements();
		Value *Vec1 = nullptr;
		Value *Vec2 = nullptr;
		mkuperUnsubmitted Not Done Reply Inline Actions I'm not a fan of using SK_Alternate here. Can you use Optional<>? mkuper: I'm not a fan of using SK_Alternate here. Can you use Optional<>?
		ABataevAuthorUnsubmitted Not Done Reply Inline Actions Oh, yes, thanks for the hint. ABataev: Oh, yes, thanks for the hint.
		enum ShuffleMode {Unknown, FirstAlternate, SecondAlternate, Permute};
		ShuffleMode CommonShuffleMode = Unknown;
		for (unsigned I = 0, E = VL.size(); I < E; ++I) {
		auto *EI = cast<ExtractElementInst>(VL[I]);
		mkuperUnsubmitted Not Done Reply Inline Actions Maybe define all 3 on one line? (If you think this is better, feel free to leave as is). mkuper: Maybe define all 3 on one line? (If you think this is better, feel free to leave as is).
		ABataevAuthorUnsubmitted Not Done Reply Inline Actions Ok, fine. ABataev: Ok, fine.
		auto *Vec = EI->getVectorOperand();
		// All vector operands must have the same number of vector elements.
		if (Vec->getType()->getVectorNumElements() != Size)
		return None;
		mkuperUnsubmitted Not Done Reply Inline Actions I think you're guaranteed that all members of a VL are the same instruction type, so you can just cast<> here, and avoid the check below. (Please verify I'm right, though. :-) ) mkuper: I think you're guaranteed that all members of a VL are the same instruction type, so you can…
		ABataevAuthorUnsubmitted Not Done Reply Inline Actions No, we're not guaranteed. This function works when we found out that this is gathering of elements. In this case, we have no information that all instructions are of the same type. ABataev: No, we're not guaranteed. This function works when we found out that this is gathering of…
		mkuperUnsubmitted Not Done Reply Inline Actions Ah, I see, ok. mkuper: Ah, I see, ok.
		auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
		if (!Idx)
		return None;
		// Undefined behaviour if Idx is negative or >= Size.
		if (Idx->getValue().isNegative())
		continue;
		unsigned IntIdx = Idx->getValue().getZExtValue();
		if (IntIdx >= Size)
		RKSimonUnsubmitted Not Done Reply Inline Actions You could merge this and the isNegative test above into a single bounds test by using: if (Idx->getValue().uge(Size)) continue; unsigned IntIdx = Idx->getValue().getZExtValue(); RKSimon: You could merge this and the isNegative test above into a single bounds test by using: ``` if…
		continue;
		// We can extractelement from undef vector.
		if (isa<UndefValue>(Vec))
		continue;
		// For correct shuffling we have to have at most 2 different vector operands
		// in all extractelement instructions.
		if (Vec1 && Vec2 && Vec != Vec1 && Vec != Vec2)
		return None;
		if (CommonShuffleMode == Permute)
		continue;
		// If the extract index is not the same as the operation number, it is a
		// permutation.
		if (IntIdx != I) {
		CommonShuffleMode = Permute;
		continue;
		}
		// Check the shuffle mode for the current operation.
		if (!Vec1)
		Vec1 = Vec;
		else if (!Vec2)
		mkuperUnsubmitted Done Reply Inline Actions Shouldn't this be something like else if (Vec1 != Vec) ? mkuper: Shouldn't this be something like ``` else if (Vec1 != Vec) ``` ?
		ABataevAuthorUnsubmitted Not Done Reply Inline Actions Yes, you're correct, thanks. ABataev: Yes, you're correct, thanks.
		Vec2 = Vec;
		// Example: shufflevector A, B, <0,5,2,7>
		// I is odd and IntIdx for A == I - FirstAlternate shuffle.
		// I is even and IntIdx for B == I - FirstAlternate shuffle.
		// Example: shufflevector A, B, <4,1,6,3>
		// I is even and IntIdx for A == I - SecondAlternate shuffle.
		// I is odd and IntIdx for B == I - SecondAlternate shuffle.
		ShuffleMode CurrentShuffleMode = ((Vec == Vec1) != static_cast<bool>(I & 1))
		mkuperUnsubmitted Not Done Reply Inline Actions I think if the order of extracts and inserts just happens to be the same, we'll overestimate the cost. But that's fine for now. Could you check, and if we indeed overestimate, add a FIXME? mkuper: I think if the order of extracts and inserts just happens to be the same, we'll overestimate…
		ABataevAuthorUnsubmitted Not Done Reply Inline Actions Yes, we're overestimated. Reworked it a bit. If `UsedElements1.any()` is `false`, it is `SK_Alternate` (i.e. blending, because we're not crossing lanes in `extractelement` instructions for different vectors), otherwise it is still estimated as `SK_PermuteTwoSrc` or `SK_PermuteSingleSrc`. ABataev: Yes, we're overestimated. Reworked it a bit. If `UsedElements1.any()` is `false`, it is…
		mkuperUnsubmitted Not Done Reply Inline Actions Oh, I'm sorry, I misread &= as \|=. This still looks wrong, though. The way I understand it, right now SK_Alternate (which usually, for x86, maps to a blend - but the two are not equivalent) has very specific requirements for the mask. See isAlternateVectorMask() in CostModel.cpp. I don't think your code matches that. Adding Elena who should probably know better than I do. mkuper: Oh, I'm sorry, I misread &= as \|=. This still looks wrong, though. The way I understand it…
		mkuperUnsubmitted Not Done Reply Inline Actions When can !UsedElements1.any() be true? mkuper: When can !UsedElements1.any() be true?
		RKSimonUnsubmitted Not Done Reply Inline Actions @mkuper is right - SK_Alternate is not a simple blend, it should only match shuffles which alternate between sequential elements from the 2 vectors (e.g. 0, 5, 2, 7). I've no idea why we went for that instead of a general SK_Blend but that's where we are... RKSimon: @mkuper is right - SK_Alternate is not a simple blend, it should only match shuffles which…
		ABataevAuthorUnsubmitted Not Done Reply Inline Actions Reworked this part ABataev: Reworked this part
		mkuperUnsubmitted Done Reply Inline Actions The condition of the ternary expression is kind of confusing. Maybe unpack it a bit? mkuper: The condition of the ternary expression is kind of confusing. Maybe unpack it a bit?
		? FirstAlternate
		: SecondAlternate;
		// Common mode is not set or the same as the shuffle mode of the current
		// operation - alternate.
		if (CommonShuffleMode == Unknown)
		CommonShuffleMode = CurrentShuffleMode;
		// Common shuffle mode is not the same as the shuffle mode of the current
		// operation - permutation.
		if (CommonShuffleMode != CurrentShuffleMode)
		CommonShuffleMode = Permute;
		}
		// If we're not crossing lanes in different vectors, consider it as blending.
		if ((CommonShuffleMode == FirstAlternate \|\|
		CommonShuffleMode == SecondAlternate) &&
		Vec2)
		return TargetTransformInfo::SK_Alternate;
		// If Vec2 was never used, we have a permutation of a single vector, otherwise
		// we have permutation of 2 vectors.
		return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc
		: TargetTransformInfo::SK_PermuteSingleSrc;
		}

///\returns Opcode that can be clubbed with \p Op to create an alternate		///\returns Opcode that can be clubbed with \p Op to create an alternate
/// sequence which can later be merged as a ShuffleVector instruction.		/// sequence which can later be merged as a ShuffleVector instruction.
static unsigned getAltOpcode(unsigned Op) {		static unsigned getAltOpcode(unsigned Op) {
switch (Op) {		switch (Op) {
case Instruction::FAdd:		case Instruction::FAdd:
return Instruction::FSub;		return Instruction::FSub;
case Instruction::FSub:		case Instruction::FSub:
return Instruction::FAdd;		return Instruction::FAdd;
▲ Show 20 Lines • Show All 1,562 Lines • ▼ Show 20 Lines	VecTy = VectorType::get(
IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());		IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());

if (E->NeedToGather) {		if (E->NeedToGather) {
if (allConstant(VL))		if (allConstant(VL))
return 0;		return 0;
if (isSplat(VL)) {		if (isSplat(VL)) {
return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);		return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
}		}
		if (getSameOpcode(VL) == Instruction::ExtractElement) {
		mkuperUnsubmitted Not Done Reply Inline Actions Maybe check whether VL[0] is an extract here? It would make it clearer that the code below is relevant only for extracts, as opposed to checking internally. mkuper: Maybe check whether VL[0] is an extract here? It would make it clearer that the code below is…
		ABataevAuthorUnsubmitted Not Done Reply Inline Actions Ok, will do. ABataev: Ok, will do.
		Optional<TargetTransformInfo::ShuffleKind> ShuffleKind = isShuffle(VL);
		if (ShuffleKind.hasValue()) {
		int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
		for (auto *V : VL) {
		RKSimonUnsubmitted Done Reply Inline Actions use for range loop? RKSimon: use for range loop?
		// If all users of instruction are going to be vectorized and this
		// instruction itself is not going to be vectorized, consider this
		// instruction as dead and remove its cost from the final cost of the
		// vectorized tree.
		if (areAllUsersVectorized(cast<Instruction>(V)) &&
		!ScalarToTreeEntry.count(V)) {
		auto *IO = cast<ConstantInt>(
		cast<ExtractElementInst>(V)->getIndexOperand());
		Cost -= TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
		IO->getZExtValue());
		}
		RKSimonUnsubmitted Done Reply Inline Actions Any way that this can be tied up? Maybe pull out cast<ExtractElementInst>(VL[I])->getIndexOperand()) I hate seeing -> on new lines.... RKSimon: Any way that this can be tied up? Maybe pull out cast<ExtractElementInst>(VL[I])…
		}
		return Cost;
		}
		}
return getGatherCost(E->Scalars);		return getGatherCost(E->Scalars);
}		}
unsigned Opcode = getSameOpcode(VL);		unsigned Opcode = getSameOpcode(VL);
assert(Opcode && allSameType(VL) && allSameBlock(VL) && "Invalid VL");		assert(Opcode && allSameType(VL) && allSameBlock(VL) && "Invalid VL");
Instruction *VL0 = cast<Instruction>(VL[0]);		Instruction *VL0 = cast<Instruction>(VL[0]);
switch (Opcode) {		switch (Opcode) {
case Instruction::PHI: {		case Instruction::PHI: {
return 0;		return 0;
}		}
case Instruction::ExtractValue:		case Instruction::ExtractValue:
case Instruction::ExtractElement: {		case Instruction::ExtractElement: {
if (canReuseExtract(VL, VL0)) {		if (canReuseExtract(VL, VL0)) {
int DeadCost = 0;		int DeadCost = 0;
for (unsigned i = 0, e = VL.size(); i < e; ++i) {		for (unsigned i = 0, e = VL.size(); i < e; ++i) {
Instruction *E = cast<Instruction>(VL[i]);		Instruction *E = cast<Instruction>(VL[i]);
// If all users are going to be vectorized, instruction can be		// If all users are going to be vectorized, instruction can be
// considered as dead.		// considered as dead.
// The same, if have only one user, it will be vectorized for sure.		// The same, if have only one user, it will be vectorized for sure.
if (areAllUsersVectorized(E))		if (areAllUsersVectorized(E))
		RKSimonUnsubmitted Not Done Reply Inline Actions I think this move to BoUpSLP::areAllUsersVectorized can be done as a NFC pre-commit. RKSimon: I think this move to BoUpSLP::areAllUsersVectorized can be done as a NFC pre-commit.
		ABataevAuthorUnsubmitted Not Done Reply Inline Actions Ok, will do ABataev: Ok, will do
// Take credit for instruction that will become dead.		// Take credit for instruction that will become dead.
DeadCost +=		DeadCost +=
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);		TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
}		}
return -DeadCost;		return -DeadCost;
}		}
return getGatherCost(VecTy);		return getGatherCost(VecTy);
}		}
▲ Show 20 Lines • Show All 3,396 Lines • Show Last 20 Lines

test/Transforms/SLPVectorizer/X86/blending-shuffle.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt < %s -slp-vectorizer -S -o - -mtriple=x86-64-unknown-linux -mcpu=bdver2 -instcombine \| FileCheck %s			; RUN: opt < %s -slp-vectorizer -S -o - -mtriple=x86-64-unknown-linux -mcpu=bdver2 -instcombine \| FileCheck %s

	define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {			define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
	; CHECK-LABEL: @g(			; CHECK-LABEL: @g(
	; CHECK-NEXT: [[X0:%.]] = extractelement <2 x i8> [[X:%.]], i32 0			; CHECK-NEXT: [[TMP1:%.]] = shufflevector <2 x i8> [[X:%.]], <2 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 3>
	; CHECK-NEXT: [[Y1:%.]] = extractelement <2 x i8> [[Y:%.]], i32 1			; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]]
	; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]			; CHECK-NEXT: ret <2 x i8> [[TMP2]]
	; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
	; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x i8> undef, i8 [[X0X0]], i32 0
	; CHECK-NEXT: [[INS2:%.*]] = insertelement <2 x i8> [[INS1]], i8 [[Y1Y1]], i32 1
	; CHECK-NEXT: ret <2 x i8> [[INS2]]
	;			;
	%x0 = extractelement <2 x i8> %x, i32 0			%x0 = extractelement <2 x i8> %x, i32 0
	%y1 = extractelement <2 x i8> %y, i32 1			%y1 = extractelement <2 x i8> %y, i32 1
	%x0x0 = mul i8 %x0, %x0			%x0x0 = mul i8 %x0, %x0
	%y1y1 = mul i8 %y1, %y1			%y1y1 = mul i8 %y1, %y1
	%ins1 = insertelement <2 x i8> undef, i8 %x0x0, i32 0			%ins1 = insertelement <2 x i8> undef, i8 %x0x0, i32 0
	%ins2 = insertelement <2 x i8> %ins1, i8 %y1y1, i32 1			%ins2 = insertelement <2 x i8> %ins1, i8 %y1y1, i32 1
	ret <2 x i8> %ins2			ret <2 x i8> %ins2
	}			}

	define <4 x i8> @h(<4 x i8> %x, <4 x i8> %y) {			define <4 x i8> @h(<4 x i8> %x, <4 x i8> %y) {
	; CHECK-LABEL: @h(			; CHECK-LABEL: @h(
	; CHECK-NEXT: [[X0:%.]] = extractelement <4 x i8> [[X:%.]], i32 0			; CHECK-NEXT: [[TMP1:%.]] = shufflevector <4 x i8> [[X:%.]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6>
	; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3			; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
	; CHECK-NEXT: [[Y1:%.]] = extractelement <4 x i8> [[Y:%.]], i32 1			; CHECK-NEXT: ret <4 x i8> [[TMP2]]
	; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
	; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
	; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
	; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
	; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
	; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x i8> undef, i8 [[X0X0]], i32 0
	; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i8> [[INS1]], i8 [[X3X3]], i32 1
	; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i8> [[INS2]], i8 [[Y1Y1]], i32 2
	; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x i8> [[INS3]], i8 [[Y2Y2]], i32 3
	; CHECK-NEXT: ret <4 x i8> [[INS4]]
	;			;
	%x0 = extractelement <4 x i8> %x, i32 0			%x0 = extractelement <4 x i8> %x, i32 0
	%x3 = extractelement <4 x i8> %x, i32 3			%x3 = extractelement <4 x i8> %x, i32 3
	%y1 = extractelement <4 x i8> %y, i32 1			%y1 = extractelement <4 x i8> %y, i32 1
	%y2 = extractelement <4 x i8> %y, i32 2			%y2 = extractelement <4 x i8> %y, i32 2
	%x0x0 = mul i8 %x0, %x0			%x0x0 = mul i8 %x0, %x0
	%x3x3 = mul i8 %x3, %x3			%x3x3 = mul i8 %x3, %x3
	%y1y1 = mul i8 %y1, %y1			%y1y1 = mul i8 %y1, %y1
	%y2y2 = mul i8 %y2, %y2			%y2y2 = mul i8 %y2, %y2
	%ins1 = insertelement <4 x i8> undef, i8 %x0x0, i32 0			%ins1 = insertelement <4 x i8> undef, i8 %x0x0, i32 0
	%ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1			%ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
	%ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2			%ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
	%ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3			%ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
	ret <4 x i8> %ins4			ret <4 x i8> %ins4
	}			}

	define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) {			define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) {
	; CHECK-LABEL: @h_undef(			; CHECK-LABEL: @h_undef(
	; CHECK-NEXT: [[X3:%.]] = extractelement <4 x i8> [[X:%.]], i32 3			; CHECK-NEXT: [[TMP1:%.]] = shufflevector <4 x i8> [[X:%.]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 undef, i32 3, i32 5, i32 6>
	; CHECK-NEXT: [[Y1:%.]] = extractelement <4 x i8> [[Y:%.]], i32 1			; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
	; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2			; CHECK-NEXT: ret <4 x i8> [[TMP2]]
	; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
	; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
	; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
	; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i8> undef, i8 [[X3X3]], i32 1
	; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i8> [[INS2]], i8 [[Y1Y1]], i32 2
	; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x i8> [[INS3]], i8 [[Y2Y2]], i32 3
	; CHECK-NEXT: ret <4 x i8> [[INS4]]
	;			;
	%x0 = extractelement <4 x i8> undef, i32 0			%x0 = extractelement <4 x i8> undef, i32 0
	%x3 = extractelement <4 x i8> %x, i32 3			%x3 = extractelement <4 x i8> %x, i32 3
	%y1 = extractelement <4 x i8> %y, i32 1			%y1 = extractelement <4 x i8> %y, i32 1
	%y2 = extractelement <4 x i8> %y, i32 2			%y2 = extractelement <4 x i8> %y, i32 2
	%x0x0 = mul i8 %x0, %x0			%x0x0 = mul i8 %x0, %x0
	%x3x3 = mul i8 %x3, %x3			%x3x3 = mul i8 %x3, %x3
	%y1y1 = mul i8 %y1, %y1			%y1y1 = mul i8 %y1, %y1
	%y2y2 = mul i8 %y2, %y2			%y2y2 = mul i8 %y2, %y2
	%ins1 = insertelement <4 x i8> undef, i8 %x0x0, i32 0			%ins1 = insertelement <4 x i8> undef, i8 %x0x0, i32 0
	%ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1			%ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
	%ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2			%ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
	%ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3			%ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
	ret <4 x i8> %ins4			ret <4 x i8> %ins4
	}			}

	define i8 @i(<4 x i8> %x, <4 x i8> %y) {			define i8 @i(<4 x i8> %x, <4 x i8> %y) {
	; CHECK-LABEL: @i(			; CHECK-LABEL: @i(
	; CHECK-NEXT: [[X0:%.]] = extractelement <4 x i8> [[X:%.]], i32 0			; CHECK-NEXT: [[TMP1:%.]] = shufflevector <4 x i8> [[X:%.]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6>
	; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3			; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
	; CHECK-NEXT: [[Y1:%.]] = extractelement <4 x i8> [[Y:%.]], i32 1			; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
	; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2			; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i8> [[TMP2]], [[RDX_SHUF]]
	; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]			; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i8> [[BIN_RDX]], <4 x i8> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
	; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]			; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i8> [[BIN_RDX]], [[RDX_SHUF1]]
	; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]			; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i8> [[BIN_RDX2]], i32 0
	; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
	; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
	; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[Y1Y1]], [[Y2Y2]]
	; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[TMP1]], [[TMP2]]
	; CHECK-NEXT: ret i8 [[TMP3]]			; CHECK-NEXT: ret i8 [[TMP3]]
	;			;
	%x0 = extractelement <4 x i8> %x, i32 0			%x0 = extractelement <4 x i8> %x, i32 0
	%x3 = extractelement <4 x i8> %x, i32 3			%x3 = extractelement <4 x i8> %x, i32 3
	%y1 = extractelement <4 x i8> %y, i32 1			%y1 = extractelement <4 x i8> %y, i32 1
	%y2 = extractelement <4 x i8> %y, i32 2			%y2 = extractelement <4 x i8> %y, i32 2
	%x0x0 = mul i8 %x0, %x0			%x0x0 = mul i8 %x0, %x0
	%x3x3 = mul i8 %x3, %x3			%x3x3 = mul i8 %x3, %x3
	%y1y1 = mul i8 %y1, %y1			%y1y1 = mul i8 %y1, %y1
	%y2y2 = mul i8 %y2, %y2			%y2y2 = mul i8 %y2, %y2
	%1 = add i8 %x0x0, %x3x3			%1 = add i8 %x0x0, %x3x3
	%2 = add i8 %y1y1, %y2y2			%2 = add i8 %y1y1, %y2y2
	%3 = add i8 %1, %2			%3 = add i8 %1, %2
	ret i8 %3			ret i8 %3
	}			}

	define i8 @j(<4 x i8> %x, <4 x i8> %y) {			define i8 @j(<4 x i8> %x, <4 x i8> %y) {
	; CHECK-LABEL: @j(			; CHECK-LABEL: @j(
	; CHECK-NEXT: [[X0:%.]] = extractelement <4 x i8> [[X:%.]], i32 0			; CHECK-NEXT: [[TMP1:%.]] = shufflevector <4 x i8> [[X:%.]], <4 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 5>
	; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3			; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]]
	; CHECK-NEXT: [[Y1:%.]] = extractelement <4 x i8> [[Y:%.]], i32 1			; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]], <2 x i32> <i32 3, i32 6>
	; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2			; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i8> [[TMP3]], [[TMP3]]
	; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]			; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
	; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]			; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
	; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]			; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
	; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]			; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
	; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]			; CHECK-NEXT: ret i8 [[TMP8]]
	; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[Y1Y1]], [[Y2Y2]]
	; CHECK-NEXT: [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
	; CHECK-NEXT: ret i8 [[TMP3]]
	;			;
	%x0 = extractelement <4 x i8> %x, i32 0			%x0 = extractelement <4 x i8> %x, i32 0
	%x3 = extractelement <4 x i8> %x, i32 3			%x3 = extractelement <4 x i8> %x, i32 3
	%y1 = extractelement <4 x i8> %y, i32 1			%y1 = extractelement <4 x i8> %y, i32 1
	%y2 = extractelement <4 x i8> %y, i32 2			%y2 = extractelement <4 x i8> %y, i32 2
	%x0x0 = mul i8 %x0, %x0			%x0x0 = mul i8 %x0, %x0
	%x3x3 = mul i8 %x3, %x3			%x3x3 = mul i8 %x3, %x3
	%y1y1 = mul i8 %y1, %y1			%y1y1 = mul i8 %y1, %y1
	%y2y2 = mul i8 %y2, %y2			%y2y2 = mul i8 %y2, %y2
	%1 = add i8 %x0x0, %x3x3			%1 = add i8 %x0x0, %x3x3
	%2 = add i8 %y1y1, %y2y2			%2 = add i8 %y1y1, %y2y2
	%3 = sdiv i8 %1, %2			%3 = sdiv i8 %1, %2
	ret i8 %3			ret i8 %3
	}			}

	define i8 @k(<4 x i8> %x) {			define i8 @k(<4 x i8> %x) {
	; CHECK-LABEL: @k(			; CHECK-LABEL: @k(
	; CHECK-NEXT: [[X0:%.]] = extractelement <4 x i8> [[X:%.]], i32 0			; CHECK-NEXT: [[TMP1:%.]] = mul <4 x i8> [[X:%.]], [[X]]
	; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3			; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
	; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i8> [[X]], i32 1			; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i8> [[X]], [[X]]
	; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i8> [[X]], i32 2			; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> undef, <2 x i32> <i32 3, i32 2>
	; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]			; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
	; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]			; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
	; CHECK-NEXT: [[X1X1:%.*]] = mul i8 [[X1]], [[X1]]			; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
	; CHECK-NEXT: [[X2X2:%.*]] = mul i8 [[X2]], [[X2]]			; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
	; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]			; CHECK-NEXT: ret i8 [[TMP8]]
	; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X1X1]], [[X2X2]]
	; CHECK-NEXT: [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
	; CHECK-NEXT: ret i8 [[TMP3]]
	;			;
	%x0 = extractelement <4 x i8> %x, i32 0			%x0 = extractelement <4 x i8> %x, i32 0
	%x3 = extractelement <4 x i8> %x, i32 3			%x3 = extractelement <4 x i8> %x, i32 3
	%x1 = extractelement <4 x i8> %x, i32 1			%x1 = extractelement <4 x i8> %x, i32 1
	%x2 = extractelement <4 x i8> %x, i32 2			%x2 = extractelement <4 x i8> %x, i32 2
	%x0x0 = mul i8 %x0, %x0			%x0x0 = mul i8 %x0, %x0
	%x3x3 = mul i8 %x3, %x3			%x3x3 = mul i8 %x3, %x3
	%x1x1 = mul i8 %x1, %x1			%x1x1 = mul i8 %x1, %x1
	%x2x2 = mul i8 %x2, %x2			%x2x2 = mul i8 %x2, %x2
	%1 = add i8 %x0x0, %x3x3			%1 = add i8 %x0x0, %x3x3
	%2 = add i8 %x1x1, %x2x2			%2 = add i8 %x1x1, %x2x2
	%3 = sdiv i8 %1, %2			%3 = sdiv i8 %1, %2
	ret i8 %3			ret i8 %3
	}			}

This is an archive of the discontinued LLVM Phabricator instance.

[SLP] Fix for PR31880: shuffle and vectorize repeated scalar ops on extracted elements
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 108498

lib/Transforms/Vectorize/SLPVectorizer.cpp

test/Transforms/SLPVectorizer/X86/blending-shuffle.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SLP] Fix for PR31880: shuffle and vectorize repeated scalar ops on extracted elementsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 108498

lib/Transforms/Vectorize/SLPVectorizer.cpp

test/Transforms/SLPVectorizer/X86/blending-shuffle.ll

[SLP] Fix for PR31880: shuffle and vectorize repeated scalar ops on extracted elements
ClosedPublic