Diff 352635

llvm/include/llvm/Analysis/TargetTransformInfo.h

Show First 20 Lines • Show All 654 Lines • ▼ Show 20 Lines	public:
/// Return true if the target supports masked gather.		/// Return true if the target supports masked gather.
bool isLegalMaskedGather(Type *DataType, Align Alignment) const;		bool isLegalMaskedGather(Type *DataType, Align Alignment) const;

/// Return true if the target supports masked compress store.		/// Return true if the target supports masked compress store.
bool isLegalMaskedCompressStore(Type *DataType) const;		bool isLegalMaskedCompressStore(Type *DataType) const;
/// Return true if the target supports masked expand load.		/// Return true if the target supports masked expand load.
bool isLegalMaskedExpandLoad(Type *DataType) const;		bool isLegalMaskedExpandLoad(Type *DataType) const;

		/// Return true if the target supports vectorization of the intrinsic IID for
		/// a given ElementCount VF.
		bool isLegalIntrinsicForScalableVectors(Intrinsic::ID IID) const;
		sdesmalenUnsubmitted Done Reply Inline Actions nit: I read `isLegalScalableVectorIntrinsic` as asking if the "scalable vector intrinsic" is legal as if the behaviour of the intrinsic is specific to scalable vectors. Rather, what you're asking is if the intrinsic (not specific to scalable vectors) is legal to use with scalable vector arguments, so from that reasoning I prefer `isIntrinsicLegalForScalableVectors`. sdesmalen: nit: I read `isLegalScalableVectorIntrinsic` as asking if the "scalable vector intrinsic" is…

/// Return true if the target has a unified operation to calculate division		/// Return true if the target has a unified operation to calculate division
/// and remainder. If so, the additional implicit multiplication and		/// and remainder. If so, the additional implicit multiplication and
/// subtraction required to calculate a remainder from division are free. This		/// subtraction required to calculate a remainder from division are free. This
/// can enable more aggressive transformations for division and remainder than		/// can enable more aggressive transformations for division and remainder than
/// would typically be allowed using throughput or size cost models.		/// would typically be allowed using throughput or size cost models.
bool hasDivRemOp(Type *DataType, bool IsSigned) const;		bool hasDivRemOp(Type *DataType, bool IsSigned) const;

/// Return true if the given instruction (assumed to be a memory access		/// Return true if the given instruction (assumed to be a memory access
▲ Show 20 Lines • Show All 840 Lines • ▼ Show 20 Lines	public:
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;		virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;		virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;		virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;		virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;		virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;		virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;		virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;		virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
		virtual bool isLegalIntrinsicForScalableVectors(Intrinsic::ID IID) = 0;
		sdesmalenUnsubmitted Done Reply Inline Actions nit: Ah I only now see it falls a bit out of style, so perhaps `isLegalIntrinsicForScalableVectors` is a better name. Sorry I didn't spot that before. sdesmalen: nit: Ah I only now see it falls a bit out of style, so perhaps…
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;		virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;		virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
virtual bool prefersVectorizedAddressing() = 0;		virtual bool prefersVectorizedAddressing() = 0;
virtual InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,		virtual InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
int64_t BaseOffset,		int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,		bool HasBaseReg, int64_t Scale,
unsigned AddrSpace) = 0;		unsigned AddrSpace) = 0;
virtual bool LSRWithInstrQueries() = 0;		virtual bool LSRWithInstrQueries() = 0;
▲ Show 20 Lines • Show All 365 Lines • ▼ Show 20 Lines	bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
return Impl.isLegalMaskedGather(DataType, Alignment);		return Impl.isLegalMaskedGather(DataType, Alignment);
}		}
bool isLegalMaskedCompressStore(Type *DataType) override {		bool isLegalMaskedCompressStore(Type *DataType) override {
return Impl.isLegalMaskedCompressStore(DataType);		return Impl.isLegalMaskedCompressStore(DataType);
}		}
bool isLegalMaskedExpandLoad(Type *DataType) override {		bool isLegalMaskedExpandLoad(Type *DataType) override {
return Impl.isLegalMaskedExpandLoad(DataType);		return Impl.isLegalMaskedExpandLoad(DataType);
}		}
		bool isLegalIntrinsicForScalableVectors(Intrinsic::ID IID) override {
		return Impl.isLegalIntrinsicForScalableVectors(IID);
		}

bool hasDivRemOp(Type *DataType, bool IsSigned) override {		bool hasDivRemOp(Type *DataType, bool IsSigned) override {
return Impl.hasDivRemOp(DataType, IsSigned);		return Impl.hasDivRemOp(DataType, IsSigned);
}		}
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {		bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
return Impl.hasVolatileVariant(I, AddrSpace);		return Impl.hasVolatileVariant(I, AddrSpace);
}		}
bool prefersVectorizedAddressing() override {		bool prefersVectorizedAddressing() override {
return Impl.prefersVectorizedAddressing();		return Impl.prefersVectorizedAddressing();
▲ Show 20 Lines • Show All 497 Lines • Show Last 20 Lines

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Show First 20 Lines • Show All 252 Lines • ▼ Show 20 Lines	public:
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {		bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
return false;		return false;
}		}

bool isLegalMaskedGather(Type *DataType, Align Alignment) const {		bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
return false;		return false;
}		}

		bool isLegalIntrinsicForScalableVectors(Intrinsic::ID IID) const {
		return false;
		}

bool isLegalMaskedCompressStore(Type *DataType) const { return false; }		bool isLegalMaskedCompressStore(Type *DataType) const { return false; }

bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }		bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }

bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }		bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }

bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {		bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
return false;		return false;
▲ Show 20 Lines • Show All 906 Lines • Show Last 20 Lines

llvm/lib/Analysis/TargetTransformInfo.cpp

	Show First 20 Lines • Show All 403 Lines • ▼ Show 20 Lines
	bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {			bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {
	return TTIImpl->isLegalMaskedCompressStore(DataType);			return TTIImpl->isLegalMaskedCompressStore(DataType);
	}			}

	bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {			bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
	return TTIImpl->isLegalMaskedExpandLoad(DataType);			return TTIImpl->isLegalMaskedExpandLoad(DataType);
	}			}

				bool TargetTransformInfo::isLegalIntrinsicForScalableVectors(
				Intrinsic::ID IID) const {
				return TTIImpl->isLegalIntrinsicForScalableVectors(IID);
				}

	bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {			bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
	return TTIImpl->hasDivRemOp(DataType, IsSigned);			return TTIImpl->hasDivRemOp(DataType, IsSigned);
	}			}

	bool TargetTransformInfo::hasVolatileVariant(Instruction *I,			bool TargetTransformInfo::hasVolatileVariant(Instruction *I,
	unsigned AddrSpace) const {			unsigned AddrSpace) const {
	return TTIImpl->hasVolatileVariant(I, AddrSpace);			return TTIImpl->hasVolatileVariant(I, AddrSpace);
	}			}
	▲ Show 20 Lines • Show All 1,029 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Show First 20 Lines • Show All 250 Lines • ▼ Show 20 Lines	public:

bool isLegalMaskedGather(Type *DataType, Align Alignment) const {		bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
return isLegalMaskedGatherScatter(DataType);		return isLegalMaskedGatherScatter(DataType);
}		}
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {		bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
return isLegalMaskedGatherScatter(DataType);		return isLegalMaskedGatherScatter(DataType);
}		}

		bool isLegalIntrinsicForScalableVectors(Intrinsic::ID IID) const;

bool isLegalNTStore(Type *DataType, Align Alignment) {		bool isLegalNTStore(Type *DataType, Align Alignment) {
// NOTE: The logic below is mostly geared towards LV, which calls it with		// NOTE: The logic below is mostly geared towards LV, which calls it with
// vectors with 2 elements. We might want to improve that, if other		// vectors with 2 elements. We might want to improve that, if other
// users show up.		// users show up.
// Nontemporal vector stores can be directly lowered to STNP, if the vector		// Nontemporal vector stores can be directly lowered to STNP, if the vector
// can be halved so that each half fits into a register. That's the case if		// can be halved so that each half fits into a register. That's the case if
// the element type fits into a register and the number of elements is a		// the element type fits into a register and the number of elements is a
// power of 2 > 1.		// power of 2 > 1.
▲ Show 20 Lines • Show All 44 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Show First 20 Lines • Show All 1,899 Lines • ▼ Show 20 Lines	if (Kind == TTI::SK_Broadcast \|\| Kind == TTI::SK_Transpose \|\|
};		};
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);		std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))		if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;		return LT.first * Entry->Cost;
}		}

return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);		return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
}		}

		bool AArch64TTIImpl::isLegalIntrinsicForScalableVectors(
		Intrinsic::ID IID) const {
		sdesmalenUnsubmitted Done Reply Inline Actions I'd rather write this the other way around, have a list here of intrinsics we do support and can safely return 'true' for, and fall back on BaseT's implementation for the 'default' case (which seems to be missing here), which is more pessimistic and only returns true if it knows the intrinsics can always be vectorized, i.e. the trivially vectorizable cases. sdesmalen: I'd rather write this the other way around, have a list here of intrinsics we //do// support…
		david-armAuthorUnsubmitted Done Reply Inline Actions So we do fall back on the BaseT version, it's just I didn't do it explicitly in the `default:` case. For some reason I thought this coding structure was the preferred way - I'm never really too sure about the coding style of switch statements in LLVM! It's also worth pointing out that that the BaseT version calls isTriviallyVectorizable where even `sin` and `cos` will return true. Therefore, returning the BaseT version for those cases is not what we want. I could either: Fill in the complete set of entries here with both the false and true cases, or Change the BaseT version to always return false. Any preferences? david-arm: So we do fall back on the BaseT version, it's just I didn't do it explicitly in the `default:`…
		sdesmalenUnsubmitted Done Reply Inline Actions It's also worth pointing out that that the BaseT version calls isTriviallyVectorizable where even sin and cos will return true. Therefore, returning the BaseT version for those cases is not what we want. That suggests to me that the BaseT implementation for isLegalScalableVectorIntrinsic is too lenient and shouldn't rely on isTriviallyVectorizable. Returning 'false' seems like a better starting point. sdesmalen: > It's also worth pointing out that that the BaseT version calls isTriviallyVectorizable where…
		switch (IID) {
		case Intrinsic::abs:
		sdesmalenUnsubmitted Done Reply Inline Actions nit: maybe drop the comments, because it's not complete (for example, smax is not bit-manipulation) sdesmalen: nit: maybe drop the comments, because it's not complete (for example, smax is not bit…
		case Intrinsic::bswap:
		case Intrinsic::bitreverse:
		case Intrinsic::ctpop:
		case Intrinsic::ctlz:
		case Intrinsic::cttz:
		case Intrinsic::fshl:
		case Intrinsic::fshr:
		case Intrinsic::smax:
		case Intrinsic::smin:
		case Intrinsic::umax:
		case Intrinsic::umin:
		case Intrinsic::sadd_sat:
		case Intrinsic::ssub_sat:
		case Intrinsic::uadd_sat:
		case Intrinsic::usub_sat:
		case Intrinsic::smul_fix:
		case Intrinsic::smul_fix_sat:
		case Intrinsic::umul_fix:
		case Intrinsic::umul_fix_sat:
		case Intrinsic::sqrt:
		case Intrinsic::fabs:
		case Intrinsic::minnum:
		case Intrinsic::maxnum:
		case Intrinsic::minimum:
		case Intrinsic::maximum:
		case Intrinsic::floor:
		case Intrinsic::ceil:
		case Intrinsic::trunc:
		case Intrinsic::rint:
		case Intrinsic::nearbyint:
		case Intrinsic::round:
		case Intrinsic::roundeven:
		case Intrinsic::fma:
		case Intrinsic::fmuladd:
		return true;
		default:
		// We can fall back on scalarization for fixed width vectors, but not for
		// scalable vectors.
		return BaseT::isLegalIntrinsicForScalableVectors(IID);
		}
		}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,507 Lines • ▼ Show 20 Lines	bool isLegalGatherOrScatter(Value *V) {
auto *Ty = getLoadStoreType(V);		auto *Ty = getLoadStoreType(V);
Align Align = getLoadStoreAlignment(V);		Align Align = getLoadStoreAlignment(V);
return (LI && TTI.isLegalMaskedGather(Ty, Align)) \|\|		return (LI && TTI.isLegalMaskedGather(Ty, Align)) \|\|
(SI && TTI.isLegalMaskedScatter(Ty, Align));		(SI && TTI.isLegalMaskedScatter(Ty, Align));
}		}

/// Returns true if the target machine supports all of the reduction		/// Returns true if the target machine supports all of the reduction
/// variables found for the given VF.		/// variables found for the given VF.
bool canVectorizeReductions(ElementCount VF) {		bool canVectorizeReductions(ElementCount VF) const {
		sdesmalenUnsubmitted Done Reply Inline Actions nit: is this an unrelated change? sdesmalen: nit: is this an unrelated change?
		david-armAuthorUnsubmitted Done Reply Inline Actions No, it's needed because the caller is also marked `const` david-arm: No, it's needed because the caller is also marked `const`
return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {		return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
const RecurrenceDescriptor &RdxDesc = Reduction.second;		const RecurrenceDescriptor &RdxDesc = Reduction.second;
return TTI.isLegalToVectorizeReduction(RdxDesc, VF);		return TTI.isLegalToVectorizeReduction(RdxDesc, VF);
}));		}));
}		}

		/// Returns true if we can widen all instructions in the loop using a maximum
		/// scalable vectorization factor MaxVF. If the loop is illegal the function
		/// returns an appropriate error remark in Msg.
		bool canWidenLoopWithScalableVectors(ElementCount MaxVF) const;

/// Returns true if \p I is an instruction that will be scalarized with		/// Returns true if \p I is an instruction that will be scalarized with
/// predication. Such instructions include conditional stores and		/// predication. Such instructions include conditional stores and
/// instructions that may divide by zero.		/// instructions that may divide by zero.
/// If a non-zero VF has been calculated, we check if I will be scalarized		/// If a non-zero VF has been calculated, we check if I will be scalarized
/// predication for that VF.		/// predication for that VF.
bool isScalarWithPredication(Instruction *I) const;		bool isScalarWithPredication(Instruction *I) const;

// Returns true if \p I is an instruction that will be predicated either		// Returns true if \p I is an instruction that will be predicated either
▲ Show 20 Lines • Show All 4,125 Lines • ▼ Show 20 Lines	reportVectorizationFailure("Runtime stride check for small trip count",
"this loop without such check by compiling with -Os/-Oz",		"this loop without such check by compiling with -Os/-Oz",
"CantVersionLoopWithOptForSize", ORE, TheLoop);		"CantVersionLoopWithOptForSize", ORE, TheLoop);
return true;		return true;
}		}

return false;		return false;
}		}

		bool LoopVectorizationCostModel::canWidenLoopWithScalableVectors(
		sdesmalenUnsubmitted Done Reply Inline Actions MaxVF ? sdesmalen: MaxVF ?
		sdesmalenUnsubmitted Done Reply Inline Actions nit: s/isSclableLoopLegal/loopCanBeWidenedWithScalableVectors/ ? sdesmalen: nit: s/isSclableLoopLegal/loopCanBeWidenedWithScalableVectors/ ?
		ElementCount MaxVF) const {
		// Test that the loop-vectorizer can legalize all operations for eligible
		kmclaughlinUnsubmitted Done Reply Inline Actions nit: could this be moved below the comments beneath it so that it's above `if !(canVectorizeReductions(VF))`? kmclaughlin: nit: could this be moved below the comments beneath it so that it's above `if !
		sdesmalenUnsubmitted Done Reply Inline Actions for eligible vectorization factors up to MaxVF sdesmalen: for eligible vectorization factors up to MaxVF
		// vectorization factors up to MaxVF.
		sdesmalenUnsubmitted Done Reply Inline Actions Can you move this comment down to the CallInst case? sdesmalen: Can you move this comment down to the CallInst case?
		david-armAuthorUnsubmitted Done Reply Inline Actions Yeah sure, although it's worth pointing out this is an existing comment that was referring to canVectorizeReductions, i.e. that we can also filter out the VFs not valid for the reductions too. I'm happy to move it down to the CallInst case though if you're not worried about the reduction case atm? david-arm: Yeah sure, although it's worth pointing out this is an existing comment that was referring to…
		sdesmalenUnsubmitted Done Reply Inline Actions Ah I see. Looking at the implementation for canVectorizeReductions, and correspondingly to isLegalToVectorizeReduction, the VF itself isn't really used, so perhaps we may just as well remove it at this point. It's more the reduction-operation that is holding it back from vectorizing rather than the VF. Initially I imagined us having a function like you added here, which would filter the VFs based on capabilities, but it turned out that for the reductions that just wasn't really needed, but we kept the individual VF operand. sdesmalen: Ah I see. Looking at the implementation for canVectorizeReductions, and correspondingly to…

		// Disable scalable vectorization if the loop contains unsupported reductions.
		if (!canVectorizeReductions(MaxVF)) {
		reportVectorizationInfo("Scalable vectorization not supported for the "
		"reduction operations found in this loop.",
		"ScalableVFUnfeasible", ORE, TheLoop);
		return false;
		}

		// Iterate through all instructions in the loop ensuring that is legal to
		// vectorize with a scalable VF.
		for (BasicBlock *BB : TheLoop->blocks()) {
		sdesmalenUnsubmitted Done Reply Inline Actions When debug-info is enabled, that will insert llvm.dbg intrinsic calls which aren't handled in your switch, but maybe it's better to avoid getting into that to begin with by discarding instructions where `I.isDebugOrPseudoInst() == true`. sdesmalen: When debug-info is enabled, that will insert llvm.dbg intrinsic calls which aren't handled in…
		for (Instruction &I : *BB) {
		if (I.isDebugOrPseudoInst())
		continue;

		if (auto *CI = dyn_cast<CallInst>(&I)) {
		kmclaughlinUnsubmitted Done Reply Inline Actions Hi @david-arm, is it possible for there to be a VecID but getMappings(CI) is not empty? Is it worth adding a comment about what should happen in this case? kmclaughlin:* Hi @david-arm, is it possible for there to be a VecID but getMappings(*CI) is not empty? Is it…
		david-armAuthorUnsubmitted Done Reply Inline Actions Yeah, so in the majority of cases I expect we don't have mappings for things like `sqrt` and `sin` intrinsics, at least not until there is some scalable vector library support. I've tried to restructure the code here to make the logic a bit clearer. david-arm: Yeah, so in the majority of cases I expect we don't have mappings for things like `sqrt` and…
		Intrinsic::ID VecID = getVectorIntrinsicIDForCall(CI, TLI);

		// First check if it's always legal to widen this intrinsic regardless
		// of the scalable VF, i.e. we don't have to worry about scalarizing
		// the intrinsic.
		if (VecID && TTI.isLegalIntrinsicForScalableVectors(VecID))
		continue;

		sdesmalenUnsubmitted Done Reply Inline Actions Instead of passing in Msg, can you just call `reportVectorizationInfo` here? Also, can you make the message more specific by telling which function is not vectorizable with scalable vectors? sdesmalen: Instead of passing in Msg, can you just call `reportVectorizationInfo` here? Also, can you make…
		// At this point we have no guarantee that we can widen this call
		// unless we have mappings in the vector function database.
		sdesmalenUnsubmitted Done Reply Inline Actions Shouldn't the code traverse the mappings in the VFdatabase to see if there is a scalable form available? sdesmalen: Shouldn't the code traverse the mappings in the VFdatabase to see if there is a scalable form…
		bool HasScalableMapping =
		llvm::any_of(VFDatabase::getMappings(*CI),
		[&](const VFInfo &V) { return V.Shape.IsScalable; });

		// FIXME: This is still optimistic because we assume that since we have
		// at least one supported VF we can vectorize the loop. However, in
		// future we should really filter out all unsupported VFs when deciding
		// which one to choose.
		if (!HasScalableMapping) {
		sdesmalenUnsubmitted Done Reply Inline Actions nit: Can you use `llvm::any_of()` instead of the above loop? sdesmalen: nit: Can you use `llvm::any_of()` instead of the above loop?
		david-armAuthorUnsubmitted Done Reply Inline Actions Sure. To be honest, in general I find the above loop more readable than using lambdas, which is why I tend to find it more natural to write code this way. However, I know lots of the codebase uses them. :) david-arm: Sure. To be honest, in general I find the above loop more readable than using lambdas, which is…
		StringRef FnName = CI->getCalledFunction()->getName();
		reportVectorizationInfo("Scalable vectorization not supported for "
		"the call instruction (" +
		FnName.str() + ") found in this loop",
		"ScalableVFUnfeasible", ORE, TheLoop);
		return false;
		}
		sdesmalenUnsubmitted Done Reply Inline Actions Can you add a FIXME that says this code is still too optimistic? It depends on the chosen VF whether the loop can vectorize (vis-a-vis whether a mapping is available for that specific VF). I expect we'll want to build up a set of suitable VFs (up to and including MaxVF) that are legal, so that we can later filter out VFs that should not be considered as candidates for vectorization. sdesmalen: Can you add a FIXME that says this code is still too optimistic? It depends on the chosen VF…
		}
		}
		}

		return true;
		}

ElementCount		ElementCount
LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {		LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
if (!TTI.supportsScalableVectors() && !ForceTargetSupportsScalableVectors) {		if (!TTI.supportsScalableVectors() && !ForceTargetSupportsScalableVectors) {
reportVectorizationInfo(		reportVectorizationInfo(
"Disabling scalable vectorization, because target does not "		"Disabling scalable vectorization, because target does not "
"support scalable vectors.",		"support scalable vectors.",
"ScalableVectorsUnsupported", ORE, TheLoop);		"ScalableVectorsUnsupported", ORE, TheLoop);
return ElementCount::getScalable(0);		return ElementCount::getScalable(0);
}		}

if (Hints->isScalableVectorizationDisabled()) {		if (Hints->isScalableVectorizationDisabled()) {
reportVectorizationInfo("Scalable vectorization is explicitly disabled",		reportVectorizationInfo("Scalable vectorization is explicitly disabled",
"ScalableVectorizationDisabled", ORE, TheLoop);		"ScalableVectorizationDisabled", ORE, TheLoop);
return ElementCount::getScalable(0);		return ElementCount::getScalable(0);
}		}

auto MaxScalableVF = ElementCount::getScalable(		auto MaxScalableVF = ElementCount::getScalable(
std::numeric_limits<ElementCount::ScalarTy>::max());		std::numeric_limits<ElementCount::ScalarTy>::max());

// Disable scalable vectorization if the loop contains unsupported reductions.		if (!canWidenLoopWithScalableVectors(MaxScalableVF))
// Test that the loop-vectorizer can legalize all operations for this MaxVF.
// FIXME: While for scalable vectors this is currently sufficient, this should
// be replaced by a more detailed mechanism that filters out specific VFs,
// instead of invalidating vectorization for a whole set of VFs based on the
// MaxVF.
if (!canVectorizeReductions(MaxScalableVF)) {
reportVectorizationInfo(
"Scalable vectorization not supported for the reduction "
"operations found in this loop.",
"ScalableVFUnfeasible", ORE, TheLoop);
return ElementCount::getScalable(0);		return ElementCount::getScalable(0);
}

if (Legal->isSafeForAnyVectorWidth())		if (Legal->isSafeForAnyVectorWidth())
return MaxScalableVF;		return MaxScalableVF;

// Limit MaxScalableVF by the maximum safe dependence distance.		// Limit MaxScalableVF by the maximum safe dependence distance.
Optional<unsigned> MaxVScale = TTI.getMaxVScale();		Optional<unsigned> MaxVScale = TTI.getMaxVScale();
MaxScalableVF = ElementCount::getScalable(		MaxScalableVF = ElementCount::getScalable(
MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);		MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
▲ Show 20 Lines • Show All 4,632 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll

	; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on < %s \| FileCheck %s			; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on \
				; RUN: -pass-remarks-missed=loop-vectorize < %s 2>%t \| FileCheck %s
				; RUN: cat %t \| FileCheck %s --check-prefix=CHECK-REMARKS

	define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {			define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {
	; CHECK-LABEL: @vec_load			; CHECK-LABEL: @vec_load
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK: %[[LOAD:.]] = load <vscale x 2 x double>, <vscale x 2 x double>			; CHECK: %[[LOAD:.]] = load <vscale x 2 x double>, <vscale x 2 x double>
	; CHECK: call <vscale x 2 x double> @foo_vec(<vscale x 2 x double> %[[LOAD]])			; CHECK: call <vscale x 2 x double> @foo_vec(<vscale x 2 x double> %[[LOAD]])
	entry:			entry:
	%cmp7 = icmp sgt i64 %N, 0			%cmp7 = icmp sgt i64 %N, 0
	▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines
	for.end:			for.end:
	ret void			ret void
	}			}

	define void @vec_intrinsic(i64 %N, double* nocapture readonly %a) {			define void @vec_intrinsic(i64 %N, double* nocapture readonly %a) {
	; CHECK-LABEL: @vec_intrinsic			; CHECK-LABEL: @vec_intrinsic
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK: %[[LOAD:.]] = load <vscale x 2 x double>, <vscale x 2 x double>			; CHECK: %[[LOAD:.]] = load <vscale x 2 x double>, <vscale x 2 x double>
	; CHECK: call fast <vscale x 2 x double> @sin_vec(<vscale x 2 x double> %[[LOAD]])			; CHECK: call fast <vscale x 2 x double> @sin_vec_nxv2f64(<vscale x 2 x double> %[[LOAD]])
	entry:			entry:
	%cmp7 = icmp sgt i64 %N, 0			%cmp7 = icmp sgt i64 %N, 0
	br i1 %cmp7, label %for.body, label %for.end			br i1 %cmp7, label %for.body, label %for.end

	for.body:			for.body:
	%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]			%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
	%arrayidx = getelementptr inbounds double, double* %a, i64 %iv			%arrayidx = getelementptr inbounds double, double* %a, i64 %iv
	%0 = load double, double* %arrayidx, align 8			%0 = load double, double* %arrayidx, align 8
	%1 = call fast double @llvm.sin.f64(double %0) #2			%1 = call fast double @llvm.sin.f64(double %0) #2
	%add = fadd fast double %1, 1.000000e+00			%add = fadd fast double %1, 1.000000e+00
	store double %add, double* %arrayidx, align 8			store double %add, double* %arrayidx, align 8
	%iv.next = add nuw nsw i64 %iv, 1			%iv.next = add nuw nsw i64 %iv, 1
	%exitcond = icmp eq i64 %iv.next, %N			%exitcond = icmp eq i64 %iv.next, %N
	br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1			br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1

	for.end:			for.end:
	ret void			ret void
	}			}

				; CHECK-REMARKS: Scalable vectorization not supported for the call instruction (llvm.sin.f32) found in this loop
				define void @vec_sin_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
				; CHECK: @vec_sin_no_mapping
				; CHECK: call fast <2 x float> @llvm.sin.v2f32
				; CHECK-NOT: <vscale x
				entry:
				br label %for.body

				for.body: ; preds = %entry, %for.body
				%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
				%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
				%0 = load float, float* %arrayidx, align 4
				%1 = tail call fast float @llvm.sin.f32(float %0)
				%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
				store float %1, float* %arrayidx1, align 4
				%inc = add nuw nsw i64 %i.07, 1
				%exitcond.not = icmp eq i64 %inc, %n
				br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1

				for.cond.cleanup: ; preds = %for.body
				ret void
				}

				; CHECK-REMARKS: Scalable vectorization not supported for the call instruction (llvm.sin.f32) found in this loop
				define void @vec_sin_fixed_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
				kmclaughlinUnsubmitted Done Reply Inline Actions nit: should this test be also be called `no_mapping` if it is legal to vectorize? kmclaughlin: nit: should this test be also be called `no_mapping` if it is legal to vectorize?
				david-armAuthorUnsubmitted Done Reply Inline Actions Hi @kmclaughlin, so in this case there are no mappings attached to the call instruction, however it is legal to vectorise because there is hardware support for it. I've added comments that hopefully explains it better! david-arm: Hi @kmclaughlin, so in this case there are no mappings attached to the call instruction…
				; CHECK: @vec_sin_fixed_mapping
				; CHECK: call fast <2 x float> @llvm.sin.v2f32
				; CHECK-NOT: <vscale x
				entry:
				br label %for.body

				for.body: ; preds = %entry, %for.body
				%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
				%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
				%0 = load float, float* %arrayidx, align 4
				%1 = tail call fast float @llvm.sin.f32(float %0) #3
				%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
				store float %1, float* %arrayidx1, align 4
				%inc = add nuw nsw i64 %i.07, 1
				%exitcond.not = icmp eq i64 %inc, %n
				br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1

				for.cond.cleanup: ; preds = %for.body
				ret void
				}

				; Even though there are no function mappings attached to the call
				; in the loop below we can still vectorize the loop because SVE has
				; hardware support in the form of the 'fqsrt' instruction.
				define void @vec_sqrt_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) #0 {
				; CHECK: @vec_sqrt_no_mapping
				; CHECK: call fast <vscale x 2 x float> @llvm.sqrt.nxv2f32
				entry:
				br label %for.body

				for.body: ; preds = %entry, %for.body
				%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
				%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
				%0 = load float, float* %arrayidx, align 4
				%1 = tail call fast float @llvm.sqrt.f32(float %0)
				%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
				store float %1, float* %arrayidx1, align 4
				%inc = add nuw nsw i64 %i.07, 1
				%exitcond.not = icmp eq i64 %inc, %n
				br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1

				for.cond.cleanup: ; preds = %for.body
				ret void
				}


	declare double @foo(double)			declare double @foo(double)
	declare i64 @bar(i64*)			declare i64 @bar(i64*)
	declare double @llvm.sin.f64(double)			declare double @llvm.sin.f64(double)
				declare float @llvm.sin.f32(float)
				declare float @llvm.sqrt.f32(float)

	declare <vscale x 2 x double> @foo_vec(<vscale x 2 x double>)			declare <vscale x 2 x double> @foo_vec(<vscale x 2 x double>)
	declare <vscale x 2 x i64> @bar_vec(<vscale x 2 x i64*>)			declare <vscale x 2 x i64> @bar_vec(<vscale x 2 x i64*>)
	declare <vscale x 2 x double> @sin_vec(<vscale x 2 x double>)			declare <vscale x 2 x double> @sin_vec_nxv2f64(<vscale x 2 x double>)
				declare <2 x double> @sin_vec_v2f64(<2 x double>)

	attributes #0 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_foo(foo_vec)" }			attributes #0 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_foo(foo_vec)" }
	attributes #1 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_bar(bar_vec)" }			attributes #1 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_bar(bar_vec)" }
	attributes #2 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_llvm.sin.f64(sin_vec)" }			attributes #2 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_llvm.sin.f64(sin_vec_nxv2f64)" }
				attributes #3 = { "vector-function-abi-variant"="_ZGV_LLVM_N2v_llvm.sin.f64(sin_vec_v2f64)" }

	!1 = distinct !{!1, !2, !3}			!1 = distinct !{!1, !2, !3}
	!2 = !{!"llvm.loop.vectorize.width", i32 2}			!2 = !{!"llvm.loop.vectorize.width", i32 2}
	!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}			!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

This is an archive of the discontinued LLVM Phabricator instance.

[LoopVectorize] Don't attempt to widen certain calls for scalable vectors
AbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 352635

llvm/include/llvm/Analysis/TargetTransformInfo.h

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/lib/Analysis/TargetTransformInfo.cpp

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll

This is an archive of the discontinued LLVM Phabricator instance.

[LoopVectorize] Don't attempt to widen certain calls for scalable vectorsAbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 352635

llvm/include/llvm/Analysis/TargetTransformInfo.h

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/lib/Analysis/TargetTransformInfo.cpp

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll

[LoopVectorize] Don't attempt to widen certain calls for scalable vectors
AbandonedPublic