Diff 46289

lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Show First 20 Lines • Show All 873 Lines • ▼ Show 20 Lines	if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
LI.setOperand(0, SI->getOperand(1));		LI.setOperand(0, SI->getOperand(1));
return &LI;		return &LI;
}		}
}		}
}		}
return nullptr;		return nullptr;
}		}

		/// \brief Look for extractelement/insertvalue sequence that acts like a bitcast.
		///
		/// \returns underlying value that was "cast", or nullptr otherwise.
		///
		/// For example, if we have:
		///
		/// %E0 = extractelement <2 x double> %U, i32 0
		hfinkelUnsubmitted Done Reply Inline Actions Please use %U<n> and %W<n> in this comment because you use the variables named U and W in the code (and that should make things clearer without necessitating longer variable names). hfinkel: Please use %U<n> and %W<n> in this comment because you use the variables named U and W in the…
		ArchDRobisonAuthorUnsubmitted Done Reply Inline Actions [I'm now back from sabbatical.] Nice idea. U and W actually refer to the same instruction. I'll try using %U, %V, and %E since those correspond to U, V, and EI in the code. I'll rename EI to E to make the correspondence cleaner. ArchDRobison: [I'm now back from sabbatical.] Nice idea. U and W actually refer to the same instruction.
		/// %V0 = insertvalue [2 x double] undef, double %E0, 0
		/// %E1 = extractelement <2 x double> %U, i32 1
		/// %V1 = insertvalue [2 x double] %V0, double %E1, 1
		///
		/// and the layout of a <2 x double> is isomorphic to a [2 x double],
		/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
		/// Note that %U may contain non-undef values where %V1 has undef.
		static Value* likeBitCastFromVector(InstCombiner &IC, Value* V) {
		majnemerUnsubmitted Done Reply Inline Actions Pointers should lean right: `Value V`. majnemer:* Pointers should lean right: `Value *V`.
		Value *U = nullptr;
		while (auto *IV = dyn_cast<InsertValueInst>(V)) {
		auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
		if (!E)
		return nullptr;
		auto *W = E->getVectorOperand();
		if (!U)
		U = W;
		else if (U != W)
		return nullptr;
		auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
		if (!CI \|\| IV->getNumIndices() != 1 \|\| CI->getZExtValue() != *IV->idx_begin())
		hfinkelUnsubmitted Not Done Reply Inline Actions Do we need to check here that IV has only one index? hfinkel: Do we need to check here that IV has only one index?
		ArchDRobisonAuthorUnsubmitted Not Done Reply Inline Actions The check is necessary, since if IV has more than one index the code would have to do much trickier checking for equivalence to a vector. Checking that the aggregate element type is a scalar type would have the same effect, though checking the number of indices seems like a more direct approach. ArchDRobison: The check is necessary, since if IV has more than one index the code would have to do much…
		return nullptr;
		V = IV->getAggregateOperand();
		}
		if (!isa<UndefValue>(V) \|\|!U)
		return nullptr;

		VectorType *UT = cast<VectorType>(U->getType());
		majnemerUnsubmitted Done Reply Inline Actions I'd use `auto UT` here and do something similar bellow. majnemer:* I'd use `auto *UT` here and do something similar bellow.
		Type *VT = V->getType();
		// Check that types UT and VT are bitwise isomorphic.
		const DataLayout &DL = IC.getDataLayout();
		if (DL.getTypeSizeInBits(UT) != DL.getTypeSizeInBits(VT)) {
		return nullptr;
		}
		if (ArrayType *AT = dyn_cast<ArrayType>(VT)) {
		if (AT->getNumElements() != UT->getNumElements())
		return nullptr;
		} else {
		StructType *ST = cast<StructType>(VT);
		if (ST->getNumElements() != UT->getNumElements())
		return nullptr;
		for (const Type *EltT : ST->elements()) {
		if (EltT != UT->getElementType())
		hfinkelUnsubmitted Not Done Reply Inline Actions Do the types here need to agree, or just the type sizes? hfinkel: Do the types here need to agree, or just the type sizes?
		ArchDRobisonAuthorUnsubmitted Not Done Reply Inline Actions At a minimum, sizes and alignments have to agree (otherwise inter-element padding might not match up exactly). But I doubt the generalization is worth the extra complexity unless there are clear use cases that would benefit from it. ArchDRobison: At a minimum, sizes and alignments have to agree (otherwise inter-element padding might not…
		return nullptr;
		}
		}
		return U;
		}

/// \brief Combine stores to match the type of value being stored.		/// \brief Combine stores to match the type of value being stored.
///		///
/// The core idea here is that the memory does not have any intrinsic type and		/// The core idea here is that the memory does not have any intrinsic type and
/// where we can we should match the type of a store to the type of value being		/// where we can we should match the type of a store to the type of value being
/// stored.		/// stored.
///		///
/// However, this routine must never change the width of a store or the number of		/// However, this routine must never change the width of a store or the number of
/// stores as that would introduce a semantic change. This combine is expected to		/// stores as that would introduce a semantic change. This combine is expected to
Show All 19 Lines	static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {

// Fold away bit casts of the stored value by storing the original type.		// Fold away bit casts of the stored value by storing the original type.
if (auto *BC = dyn_cast<BitCastInst>(V)) {		if (auto *BC = dyn_cast<BitCastInst>(V)) {
V = BC->getOperand(0);		V = BC->getOperand(0);
combineStoreToNewValue(IC, SI, V);		combineStoreToNewValue(IC, SI, V);
return true;		return true;
}		}

		if (Value *U = likeBitCastFromVector(IC, V)) {
		combineStoreToNewValue(IC, SI, U);
		return true;
		}

// FIXME: We should also canonicalize loads of vectors when their elements are		// FIXME: We should also canonicalize loads of vectors when their elements are
// cast to other types.		// cast to other types.
return false;		return false;
}		}

static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {		static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
// FIXME: We could probably with some care handle both volatile and atomic		// FIXME: We could probably with some care handle both volatile and atomic
// stores here but it isn't clear that this is important.		// stores here but it isn't clear that this is important.
▲ Show 20 Lines • Show All 347 Lines • Show Last 20 Lines

test/Transforms/InstCombine/insert-val-extract-elem.ll

				; RUN: opt -S -instcombine %s \| FileCheck %s

				; CHECK-NOT: insertvalue
				; CHECK-NOT: extractelement
				; CHECK: store <2 x double>
				define void @julia_2xdouble([2 x double]* sret, <2 x double>*) {
				top:
				%x = load <2 x double>, <2 x double>* %1
				%x0 = extractelement <2 x double> %x, i32 0
				%i0 = insertvalue [2 x double] undef, double %x0, 0
				%x1 = extractelement <2 x double> %x, i32 1
				%i1 = insertvalue [2 x double] %i0, double %x1, 1
				store [2 x double] %i1, [2 x double]* %0, align 4
				ret void
				}

				; CHECK-NOT: insertvalue
				; CHECK-NOT: extractelement
				; CHECK: store <4 x float>
				define void @julia_4xfloat([4 x float]* sret, <4 x float>*) {
				top:
				%x = load <4 x float>, <4 x float>* %1
				%x0 = extractelement <4 x float> %x, i32 0
				%i0 = insertvalue [4 x float] undef, float %x0, 0
				%x1 = extractelement <4 x float> %x, i32 1
				%i1 = insertvalue [4 x float] %i0, float %x1, 1
				%x2 = extractelement <4 x float> %x, i32 2
				%i2 = insertvalue [4 x float] %i1, float %x2, 2
				%x3 = extractelement <4 x float> %x, i32 3
				%i3 = insertvalue [4 x float] %i2, float %x3, 3
				store [4 x float] %i3, [4 x float]* %0, align 4
				ret void
				}

				%pseudovec = type { float, float, float, float }

				; CHECK-NOT: insertvalue
				; CHECK-NOT: extractelement
				; CHECK: store <4 x float>
				define void @julia_pseudovec(%pseudovec* sret, <4 x float>*) {
				top:
				%x = load <4 x float>, <4 x float>* %1
				%x0 = extractelement <4 x float> %x, i32 0
				%i0 = insertvalue %pseudovec undef, float %x0, 0
				%x1 = extractelement <4 x float> %x, i32 1
				%i1 = insertvalue %pseudovec %i0, float %x1, 1
				%x2 = extractelement <4 x float> %x, i32 2
				%i2 = insertvalue %pseudovec %i1, float %x2, 2
				%x3 = extractelement <4 x float> %x, i32 3
				%i3 = insertvalue %pseudovec %i2, float %x3, 3
				store %pseudovec %i3, %pseudovec* %0, align 4
				ret void
				}

This is an archive of the discontinued LLVM Phabricator instance.

Optimize store of "bitcast" from vector to aggregate.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 46289

lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

test/Transforms/InstCombine/insert-val-extract-elem.ll

This is an archive of the discontinued LLVM Phabricator instance.

Optimize store of "bitcast" from vector to aggregate.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 46289

lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

test/Transforms/InstCombine/insert-val-extract-elem.ll

Optimize store of "bitcast" from vector to aggregate.
ClosedPublic