Diff 189272

include/llvm/Analysis/IVDescriptors.h

Show First 20 Lines • Show All 83 Lines • ▼ Show 20 Lines	enum MinMaxRecurrenceKind {
MRK_SIntMax,		MRK_SIntMax,
MRK_FloatMin,		MRK_FloatMin,
MRK_FloatMax		MRK_FloatMax
};		};

RecurrenceDescriptor() = default;		RecurrenceDescriptor() = default;

RecurrenceDescriptor(Value Start, Instruction Exit, RecurrenceKind K,		RecurrenceDescriptor(Value Start, Instruction Exit, RecurrenceKind K,
MinMaxRecurrenceKind MK, Instruction UAI, Type RT,		FastMathFlags FMF, MinMaxRecurrenceKind MK,
bool Signed, SmallPtrSetImpl<Instruction *> &CI)		Instruction UAI, Type RT, bool Signed,
: StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK),		SmallPtrSetImpl<Instruction *> &CI)
UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {		: StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF),
		MinMaxKind(MK), UnsafeAlgebraInst(UAI), RecurrenceType(RT),
		IsSigned(Signed) {
CastInsts.insert(CI.begin(), CI.end());		CastInsts.insert(CI.begin(), CI.end());
}		}

/// This POD struct holds information about a potential recurrence operation.		/// This POD struct holds information about a potential recurrence operation.
class InstDesc {		class InstDesc {
public:		public:
InstDesc(bool IsRecur, Instruction I, Instruction UAI = nullptr)		InstDesc(bool IsRecur, Instruction I, Instruction UAI = nullptr)
: IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid),		: IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid),
▲ Show 20 Lines • Show All 89 Lines • ▼ Show 20 Lines	public:
isFirstOrderRecurrence(PHINode Phi, Loop TheLoop,		isFirstOrderRecurrence(PHINode Phi, Loop TheLoop,
DenseMap<Instruction , Instruction > &SinkAfter,		DenseMap<Instruction , Instruction > &SinkAfter,
DominatorTree *DT);		DominatorTree *DT);

RecurrenceKind getRecurrenceKind() { return Kind; }		RecurrenceKind getRecurrenceKind() { return Kind; }

MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }		MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }

		FastMathFlags getFastMathFlags() { return FMF; }

TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }		TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }

Instruction *getLoopExitInstr() { return LoopExitInstr; }		Instruction *getLoopExitInstr() { return LoopExitInstr; }

/// Returns true if the recurrence has unsafe algebra which requires a relaxed		/// Returns true if the recurrence has unsafe algebra which requires a relaxed
/// floating-point model.		/// floating-point model.
bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }		bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }

Show All 23 Lines
private:		private:
// The starting value of the recurrence.		// The starting value of the recurrence.
// It does not have to be zero!		// It does not have to be zero!
TrackingVH<Value> StartValue;		TrackingVH<Value> StartValue;
// The instruction who's value is used outside the loop.		// The instruction who's value is used outside the loop.
Instruction *LoopExitInstr = nullptr;		Instruction *LoopExitInstr = nullptr;
// The kind of the recurrence.		// The kind of the recurrence.
RecurrenceKind Kind = RK_NoRecurrence;		RecurrenceKind Kind = RK_NoRecurrence;
		// The fast-math flags on the recurrenct instructions. We propagate these
		sdesmalenUnsubmitted Done Reply Inline Actions nit: recurrenct -> recurrent sdesmalen: nit: recurrenct -> recurrent
		// fast-math flags into the vectorized FP instructions we generate.
		FastMathFlags FMF;
// If this a min/max recurrence the kind of recurrence.		// If this a min/max recurrence the kind of recurrence.
MinMaxRecurrenceKind MinMaxKind = MRK_Invalid;		MinMaxRecurrenceKind MinMaxKind = MRK_Invalid;
// First occurrence of unasfe algebra in the PHI's use-chain.		// First occurrence of unasfe algebra in the PHI's use-chain.
Instruction *UnsafeAlgebraInst = nullptr;		Instruction *UnsafeAlgebraInst = nullptr;
// The type of the recurrence.		// The type of the recurrence.
Type *RecurrenceType = nullptr;		Type *RecurrenceType = nullptr;
// True if all source operands of the recurrence are SExtInsts.		// True if all source operands of the recurrence are SExtInsts.
bool IsSigned = false;		bool IsSigned = false;
▲ Show 20 Lines • Show All 109 Lines • Show Last 20 Lines

include/llvm/IR/Operator.h

Show First 20 Lines • Show All 181 Lines • ▼ Show 20 Lines	enum {
NoSignedZeros = (1 << 3),		NoSignedZeros = (1 << 3),
AllowReciprocal = (1 << 4),		AllowReciprocal = (1 << 4),
AllowContract = (1 << 5),		AllowContract = (1 << 5),
ApproxFunc = (1 << 6)		ApproxFunc = (1 << 6)
};		};

FastMathFlags() = default;		FastMathFlags() = default;

		static FastMathFlags getFast() {
		FastMathFlags FMF;
		FMF.setFast();
		return FMF;
		}

bool any() const { return Flags != 0; }		bool any() const { return Flags != 0; }
bool none() const { return Flags == 0; }		bool none() const { return Flags == 0; }
bool all() const { return Flags == ~0U; }		bool all() const { return Flags == ~0U; }

void clear() { Flags = 0; }		void clear() { Flags = 0; }
void set() { Flags = ~0U; }		void set() { Flags = ~0U; }

/// Flag queries		/// Flag queries
▲ Show 20 Lines • Show All 387 Lines • Show Last 20 Lines

include/llvm/Transforms/Utils/LoopUtils.h

Show First 20 Lines • Show All 290 Lines • ▼ Show 20 Lines	getOrderedReduction(IRBuilder<> &Builder, Value Acc, Value Src, unsigned Op,
RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =		RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =
RecurrenceDescriptor::MRK_Invalid,		RecurrenceDescriptor::MRK_Invalid,
ArrayRef<Value *> RedOps = None);		ArrayRef<Value *> RedOps = None);

/// Generates a vector reduction using shufflevectors to reduce the value.		/// Generates a vector reduction using shufflevectors to reduce the value.
Value getShuffleReduction(IRBuilder<> &Builder, Value Src, unsigned Op,		Value getShuffleReduction(IRBuilder<> &Builder, Value Src, unsigned Op,
RecurrenceDescriptor::MinMaxRecurrenceKind		RecurrenceDescriptor::MinMaxRecurrenceKind
MinMaxKind = RecurrenceDescriptor::MRK_Invalid,		MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
		FastMathFlags FMF = FastMathFlags(),
ArrayRef<Value *> RedOps = None);		ArrayRef<Value *> RedOps = None);

/// Create a target reduction of the given vector. The reduction operation		/// Create a target reduction of the given vector. The reduction operation
/// is described by the \p Opcode parameter. min/max reductions require		/// is described by the \p Opcode parameter. min/max reductions require
/// additional information supplied in \p Flags.		/// additional information supplied in \p Flags.
/// The target is queried to determine if intrinsics or shuffle sequences are		/// The target is queried to determine if intrinsics or shuffle sequences are
/// required to implement the reduction.		/// required to implement the reduction.
Value *createSimpleTargetReduction(IRBuilder<> &B,		Value *createSimpleTargetReduction(IRBuilder<> &B,
const TargetTransformInfo *TTI,		const TargetTransformInfo *TTI,
unsigned Opcode, Value *Src,		unsigned Opcode, Value *Src,
TargetTransformInfo::ReductionFlags Flags =		TargetTransformInfo::ReductionFlags Flags =
TargetTransformInfo::ReductionFlags(),		TargetTransformInfo::ReductionFlags(),
		FastMathFlags FMF = FastMathFlags(),
ArrayRef<Value *> RedOps = None);		ArrayRef<Value *> RedOps = None);

/// Create a generic target reduction using a recurrence descriptor \p Desc		/// Create a generic target reduction using a recurrence descriptor \p Desc
/// The target is queried to determine if intrinsics or shuffle sequences are		/// The target is queried to determine if intrinsics or shuffle sequences are
/// required to implement the reduction.		/// required to implement the reduction.
Value createTargetReduction(IRBuilder<> &B, const TargetTransformInfo TTI,		Value createTargetReduction(IRBuilder<> &B, const TargetTransformInfo TTI,
RecurrenceDescriptor &Desc, Value *Src,		RecurrenceDescriptor &Desc, Value *Src,
bool NoNaN = false);		bool NoNaN = false);
Show All 28 Lines

lib/Analysis/IVDescriptors.cpp

Show First 20 Lines • Show All 245 Lines • ▼ Show 20 Lines	if (!isIntegerRecurrenceKind(Kind))
return false;		return false;
if (isArithmeticRecurrenceKind(Kind))		if (isArithmeticRecurrenceKind(Kind))
Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);		Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
}		}

Worklist.push_back(Start);		Worklist.push_back(Start);
VisitedInsts.insert(Start);		VisitedInsts.insert(Start);

		FastMathFlags FMF;
		sdesmalenUnsubmitted Done Reply Inline Actions Is it worth adding a comment describing that FMF will be an intersection of the FastMathFlags from all the reduction operations (and thus needs to start with the full set of flags)? sdesmalen: Is it worth adding a comment describing that FMF will be an intersection of the FastMathFlags…
		FMF.setFast();

// A value in the reduction can be used:		// A value in the reduction can be used:
// - By the reduction:		// - By the reduction:
// - Reduction operation:		// - Reduction operation:
// - One use of reduction value (safe).		// - One use of reduction value (safe).
// - Multiple use of reduction value (not safe).		// - Multiple use of reduction value (not safe).
// - PHI:		// - PHI:
// - All uses of the PHI must be the reduction (safe).		// - All uses of the PHI must be the reduction (safe).
// - Otherwise, not safe.		// - Otherwise, not safe.
Show All 29 Lines	while (!Worklist.empty()) {

// Any reduction instruction must be of one of the allowed kinds. We ignore		// Any reduction instruction must be of one of the allowed kinds. We ignore
// the starting value (the Phi or an AND instruction if the Phi has been		// the starting value (the Phi or an AND instruction if the Phi has been
// type-promoted).		// type-promoted).
if (Cur != Start) {		if (Cur != Start) {
ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);		ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
if (!ReduxDesc.isRecurrence())		if (!ReduxDesc.isRecurrence())
return false;		return false;
		if (isa<FPMathOperator>(ReduxDesc.getPatternInst())) {
		sdesmalenUnsubmitted Done Reply Inline Actions nit: unnecessary curly braces sdesmalen: nit: unnecessary curly braces
		FMF &= ReduxDesc.getPatternInst()->getFastMathFlags();
		}
}		}

bool IsASelect = isa<SelectInst>(Cur);		bool IsASelect = isa<SelectInst>(Cur);

// A conditional reduction operation must only have 2 or less uses in		// A conditional reduction operation must only have 2 or less uses in
// VisitedInsts.		// VisitedInsts.
if (IsASelect && (Kind == RK_FloatAdd \|\| Kind == RK_FloatMult) &&		if (IsASelect && (Kind == RK_FloatAdd \|\| Kind == RK_FloatMult) &&
hasMultipleUsesOf(Cur, VisitedInsts, 2))		hasMultipleUsesOf(Cur, VisitedInsts, 2))
▲ Show 20 Lines • Show All 129 Lines • ▼ Show 20 Lines	bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// We found a reduction var if we have reached the original phi node and we		// We found a reduction var if we have reached the original phi node and we
// only have a single instruction with out-of-loop users.		// only have a single instruction with out-of-loop users.

// The ExitInstruction(Instruction which is allowed to have out-of-loop users)		// The ExitInstruction(Instruction which is allowed to have out-of-loop users)
// is saved as part of the RecurrenceDescriptor.		// is saved as part of the RecurrenceDescriptor.

// Save the description of this reduction variable.		// Save the description of this reduction variable.
RecurrenceDescriptor RD(		RecurrenceDescriptor RD(
RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),		RdxStart, ExitInstruction, Kind, FMF, ReduxDesc.getMinMaxKind(),
ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);		ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
RedDes = RD;		RedDes = RD;

return true;		return true;
}		}

/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction		/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
/// pattern corresponding to a min(X, Y) or max(X, Y).		/// pattern corresponding to a min(X, Y) or max(X, Y).
▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines	if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) \|\|
return InstDesc(Kind == RK_FloatAdd, SI);		return InstDesc(Kind == RK_FloatAdd, SI);

if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast()))		if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast()))
return InstDesc(Kind == RK_FloatMult, SI);		return InstDesc(Kind == RK_FloatMult, SI);

return InstDesc(false, I);		return InstDesc(false, I);
}		}

		static bool CanVectorizeReduction(Instruction *I) {
		return !isa<FPMathOperator>(I) \|\| I->hasAllowReassoc();
		sdesmalenUnsubmitted Done Reply Inline Actions Is there a reason for the condition `I->hasAllowContract()`? As far as I can tell only `hasAllowReassoc()` seems required and thus `hasAllowContract()` would be overly restrictive. As long as `hasAllowReassoc()` is true, I think the resulting reduction operations should have the same properties as the original instruction. For example, if an instruction has `hasNoNaNs() == hasNoInfs() == false`, the vectorised reduction retain those properties under reassocation. When trying out this patch, the reduction block seems to assume `fast` instead of just `reassoc`, e.g. middle.block: %bin.rdx = fadd fast <4 x float> %9, %8 You'll probably want to fix that and extend the test to ensure the flags are retained. sdesmalen: Is there a reason for the condition `I->hasAllowContract()`? As far as I can tell only…
		}

RecurrenceDescriptor::InstDesc		RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,		RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
InstDesc &Prev, bool HasFunNoNaNAttr) {		InstDesc &Prev, bool HasFunNoNaNAttr) {
Instruction *UAI = Prev.getUnsafeAlgebraInst();		Instruction *UAI = Prev.getUnsafeAlgebraInst();
if (!UAI && isa<FPMathOperator>(I) && !I->isFast())		if (!UAI && !CanVectorizeReduction(I))
		sdesmalenUnsubmitted Done Reply Inline Actions nit: since 'CanVectorizeReduction()' is only used once, it probably makes more sense to just expand it here and remove the function. sdesmalen: nit: since 'CanVectorizeReduction()' is only used once, it probably makes more sense to just…
UAI = I; // Found an unsafe (unvectorizable) algebra instruction.		UAI = I; // Found an unsafe (unvectorizable) algebra instruction.

switch (I->getOpcode()) {		switch (I->getOpcode()) {
default:		default:
return InstDesc(false, I);		return InstDesc(false, I);
case Instruction::PHI:		case Instruction::PHI:
return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());		return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
case Instruction::Sub:		case Instruction::Sub:
▲ Show 20 Lines • Show All 526 Lines • Show Last 20 Lines

lib/CodeGen/ExpandReductions.cpp

Show First 20 Lines • Show All 114 Lines • ▼ Show 20 Lines	case Intrinsic::experimental_vector_reduce_fmin:
break;		break;
default:		default:
continue;		continue;
}		}
if (!TTI->shouldExpandReduction(II))		if (!TTI->shouldExpandReduction(II))
continue;		continue;
Value *Rdx =		Value *Rdx =
IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)		IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
: getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);		: getShuffleReduction(Builder, Vec, getOpcode(ID), MRK,
		FastMathFlags::getFast());
		sdesmalenUnsubmitted Not Done Reply Inline Actions Rather than choosing 'getFast() and going through 'IsOrdered' (which is set when the full 'fast' property is set), do we want to take the FastMathFlags from the IntrinsicInst directly? sdesmalen: Rather than choosing 'getFast() and going through 'IsOrdered' (which is set when the full…
		sanjoyAuthorUnsubmitted Done Reply Inline Actions Rather than choosing 'getFast() and going through 'IsOrdered' (which is set when the full 'fast' property is set), do we want to take the FastMathFlags from the IntrinsicInst directly? My rationale was that I wanted to keep this as obviously NFC as possible -- previously we would implicitly tag the instructions as `fast`, and this change just makes it explicit. Note that there is a test missing for this change. This change is not supposed to change any behavior (assuming no bugs :) ). Do you want me to add a test case to check existing behavior? `CodeGen/Generic/expand-experimental-reductions.ll` already tests for `fast` flags in the expansion. sanjoy: > Rather than choosing 'getFast() and going through 'IsOrdered' (which is set when the full…
		sdesmalenUnsubmitted Done Reply Inline Actions My rationale was that I wanted to keep this as obviously NFC as possible -- previously we would implicitly tag the instructions as fast, and this change just makes it explicit. In a way that feels a bit artificial since the patch is already not NFC and the purpose of this patch is to be more specific about passing the exact flags. However, if you think it makes more sense to do this change in a separate patch, that's fine with me. sdesmalen: > My rationale was that I wanted to keep this as obviously NFC as possible -- previously we…
		sdesmalenUnsubmitted Not Done Reply Inline Actions Note that there is a test missing for this change. sdesmalen: Note that there is a test missing for this change.
		sanjoyAuthorUnsubmitted Done Reply Inline Actions As I said above, we already have tests for this behavior (that we propagate fast-math flags into `getShuffleReduction`. sanjoy: As I said above, we already have tests for this behavior (that we propagate fast-math flags…
II->replaceAllUsesWith(Rdx);		II->replaceAllUsesWith(Rdx);
II->eraseFromParent();		II->eraseFromParent();
Changed = true;		Changed = true;
}		}
return Changed;		return Changed;
}		}

class ExpandReductions : public FunctionPass {		class ExpandReductions : public FunctionPass {
Show All 38 Lines

lib/Transforms/Utils/LoopUtils.cpp

Show First 20 Lines • Show All 665 Lines • ▼ Show 20 Lines	bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
ScalarEvolution::LoopDisposition LD =		ScalarEvolution::LoopDisposition LD =
SE.getLoopDisposition(InnerLoopBECountSC, OuterL);		SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
if (LD != ScalarEvolution::LoopInvariant)		if (LD != ScalarEvolution::LoopInvariant)
return false;		return false;

return true;		return true;
}		}

/// Adds a 'fast' flag to floating point operations.		static Value addFastMathFlag(Value V, FastMathFlags FMF) {
static Value addFastMathFlag(Value V) {
if (isa<FPMathOperator>(V)) {		if (isa<FPMathOperator>(V)) {
		sdesmalenUnsubmitted Done Reply Inline Actions nit: unnecessary curly braces. sdesmalen: nit: unnecessary curly braces.
FastMathFlags Flags;		cast<Instruction>(V)->setFastMathFlags(FMF);
Flags.setFast();
cast<Instruction>(V)->setFastMathFlags(Flags);
}		}
return V;		return V;
}		}

Value *llvm::createMinMaxOp(IRBuilder<> &Builder,		Value *llvm::createMinMaxOp(IRBuilder<> &Builder,
RecurrenceDescriptor::MinMaxRecurrenceKind RK,		RecurrenceDescriptor::MinMaxRecurrenceKind RK,
Value Left, Value Right) {		Value Left, Value Right) {
CmpInst::Predicate P = CmpInst::ICMP_NE;		CmpInst::Predicate P = CmpInst::ICMP_NE;
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines	llvm::getOrderedReduction(IRBuilder<> &Builder, Value Acc, Value Src,

return Result;		return Result;
}		}

// Helper to generate a log2 shuffle reduction.		// Helper to generate a log2 shuffle reduction.
Value *		Value *
llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,		llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,		RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
ArrayRef<Value *> RedOps) {		FastMathFlags FMF, ArrayRef<Value *> RedOps) {
unsigned VF = Src->getType()->getVectorNumElements();		unsigned VF = Src->getType()->getVectorNumElements();
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles		// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
// and vector ops, reducing the set of values being computed by half each		// and vector ops, reducing the set of values being computed by half each
// round.		// round.
assert(isPowerOf2_32(VF) &&		assert(isPowerOf2_32(VF) &&
"Reduction emission only supported for pow2 vectors!");		"Reduction emission only supported for pow2 vectors!");
Value *TmpVec = Src;		Value *TmpVec = Src;
SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);		SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
for (unsigned i = VF; i != 1; i >>= 1) {		for (unsigned i = VF; i != 1; i >>= 1) {
// Move the upper half of the vector to the lower half.		// Move the upper half of the vector to the lower half.
for (unsigned j = 0; j != i / 2; ++j)		for (unsigned j = 0; j != i / 2; ++j)
ShuffleMask[j] = Builder.getInt32(i / 2 + j);		ShuffleMask[j] = Builder.getInt32(i / 2 + j);

// Fill the rest of the mask with undef.		// Fill the rest of the mask with undef.
std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),		std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
UndefValue::get(Builder.getInt32Ty()));		UndefValue::get(Builder.getInt32Ty()));

Value *Shuf = Builder.CreateShuffleVector(		Value *Shuf = Builder.CreateShuffleVector(
TmpVec, UndefValue::get(TmpVec->getType()),		TmpVec, UndefValue::get(TmpVec->getType()),
ConstantVector::get(ShuffleMask), "rdx.shuf");		ConstantVector::get(ShuffleMask), "rdx.shuf");

if (Op != Instruction::ICmp && Op != Instruction::FCmp) {		if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
// Floating point operations had to be 'fast' to enable the reduction.		// Floating point operations had to be 'fast' to enable the reduction.
TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,		TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
TmpVec, Shuf, "bin.rdx"));		TmpVec, Shuf, "bin.rdx"),
		FMF);
} else {		} else {
assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&		assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
"Invalid min/max");		"Invalid min/max");
TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf);		TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf);
}		}
if (!RedOps.empty())		if (!RedOps.empty())
propagateIRFlags(TmpVec, RedOps);		propagateIRFlags(TmpVec, RedOps);
}		}
// The result is in the first element of the vector.		// The result is in the first element of the vector.
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));		return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
}		}

/// Create a simple vector reduction specified by an opcode and some		/// Create a simple vector reduction specified by an opcode and some
/// flags (if generating min/max reductions).		/// flags (if generating min/max reductions).
Value *llvm::createSimpleTargetReduction(		Value *llvm::createSimpleTargetReduction(
IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,		IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
Value *Src, TargetTransformInfo::ReductionFlags Flags,		Value *Src, TargetTransformInfo::ReductionFlags Flags, FastMathFlags FMF,
ArrayRef<Value *> RedOps) {		ArrayRef<Value *> RedOps) {
assert(isa<VectorType>(Src->getType()) && "Type must be a vector");		assert(isa<VectorType>(Src->getType()) && "Type must be a vector");

Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());		Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
std::function<Value *()> BuildFunc;		std::function<Value *()> BuildFunc;
using RD = RecurrenceDescriptor;		using RD = RecurrenceDescriptor;
RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;		RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
// TODO: Support creating ordered reductions.		// TODO: Support creating ordered reductions.
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines	case Instruction::FCmp:
}		}
break;		break;
default:		default:
llvm_unreachable("Unhandled opcode");		llvm_unreachable("Unhandled opcode");
break;		break;
}		}
if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))		if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
return BuildFunc();		return BuildFunc();
return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);		return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, FMF, RedOps);
}		}

/// Create a vector reduction using a given recurrence descriptor.		/// Create a vector reduction using a given recurrence descriptor.
Value *llvm::createTargetReduction(IRBuilder<> &B,		Value *llvm::createTargetReduction(IRBuilder<> &B,
const TargetTransformInfo *TTI,		const TargetTransformInfo *TTI,
RecurrenceDescriptor &Desc, Value *Src,		RecurrenceDescriptor &Desc, Value *Src,
bool NoNaN) {		bool NoNaN) {
// TODO: Support in-order reductions based on the recurrence descriptor.		// TODO: Support in-order reductions based on the recurrence descriptor.
using RD = RecurrenceDescriptor;		using RD = RecurrenceDescriptor;
RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();		RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
TargetTransformInfo::ReductionFlags Flags;		TargetTransformInfo::ReductionFlags Flags;
Flags.NoNaN = NoNaN;		Flags.NoNaN = NoNaN;
switch (RecKind) {		switch (RecKind) {
case RD::RK_FloatAdd:		case RD::RK_FloatAdd:
return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags,
		Desc.getFastMathFlags());
case RD::RK_FloatMult:		case RD::RK_FloatMult:
return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags,
		Desc.getFastMathFlags());
case RD::RK_IntegerAdd:		case RD::RK_IntegerAdd:
return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags,
		Desc.getFastMathFlags());
case RD::RK_IntegerMult:		case RD::RK_IntegerMult:
return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags,
		Desc.getFastMathFlags());
case RD::RK_IntegerAnd:		case RD::RK_IntegerAnd:
return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags,
		Desc.getFastMathFlags());
case RD::RK_IntegerOr:		case RD::RK_IntegerOr:
return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags,
		Desc.getFastMathFlags());
case RD::RK_IntegerXor:		case RD::RK_IntegerXor:
return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags,
		Desc.getFastMathFlags());
case RD::RK_IntegerMinMax: {		case RD::RK_IntegerMinMax: {
RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();		RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax \|\| MMKind == RD::MRK_UIntMax);		Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax \|\| MMKind == RD::MRK_UIntMax);
Flags.IsSigned = (MMKind == RD::MRK_SIntMax \|\| MMKind == RD::MRK_SIntMin);		Flags.IsSigned = (MMKind == RD::MRK_SIntMax \|\| MMKind == RD::MRK_SIntMin);
return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags,
		Desc.getFastMathFlags());
}		}
case RD::RK_FloatMinMax: {		case RD::RK_FloatMinMax: {
Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;		Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags,
		Desc.getFastMathFlags());
}		}
default:		default:
llvm_unreachable("Unhandled RecKind");		llvm_unreachable("Unhandled RecKind");
}		}
}		}

void llvm::propagateIRFlags(Value I, ArrayRef<Value > VL, Value *OpValue) {		void llvm::propagateIRFlags(Value I, ArrayRef<Value > VL, Value *OpValue) {
auto *VecOp = dyn_cast<Instruction>(I);		auto *VecOp = dyn_cast<Instruction>(I);
▲ Show 20 Lines • Show All 52 Lines • Show Last 20 Lines

lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 313 Lines • ▼ Show 20 Lines
/// will execute once for every X iterations of the loop header.		/// will execute once for every X iterations of the loop header.
///		///
/// TODO: We should use actual block probability here, if available. Currently,		/// TODO: We should use actual block probability here, if available. Currently,
/// we always assume predicated blocks have a 50% chance of executing.		/// we always assume predicated blocks have a 50% chance of executing.
static unsigned getReciprocalPredBlockProb() { return 2; }		static unsigned getReciprocalPredBlockProb() { return 2; }

/// A helper function that adds a 'fast' flag to floating-point operations.		/// A helper function that adds a 'fast' flag to floating-point operations.
static Value addFastMathFlag(Value V) {		static Value addFastMathFlag(Value V) {
if (isa<FPMathOperator>(V)) {		if (isa<FPMathOperator>(V)) {
		sdesmalenUnsubmitted Done Reply Inline Actions nit: unnecessary curly braces. sdesmalen: nit: unnecessary curly braces.
FastMathFlags Flags;		cast<Instruction>(V)->setFastMathFlags(FastMathFlags::getFast());
Flags.setFast();		}
cast<Instruction>(V)->setFastMathFlags(Flags);		return V;
		}

		static Value addFastMathFlag(Value V, FastMathFlags FMF) {
		if (isa<FPMathOperator>(V)) {
		sdesmalenUnsubmitted Done Reply Inline Actions nit: unnecessary curly braces. sdesmalen: nit: unnecessary curly braces.
		cast<Instruction>(V)->setFastMathFlags(FMF);
}		}
return V;		return V;
}		}

/// A helper function that returns an integer or floating-point constant with		/// A helper function that returns an integer or floating-point constant with
/// value C.		/// value C.
static Constant getSignedIntOrFpConstant(Type Ty, int64_t C) {		static Constant getSignedIntOrFpConstant(Type Ty, int64_t C) {
return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)		return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
▲ Show 20 Lines • Show All 3,273 Lines • ▼ Show 20 Lines	void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);		unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
setDebugLocFromInst(Builder, ReducedPartRdx);		setDebugLocFromInst(Builder, ReducedPartRdx);
for (unsigned Part = 1; Part < UF; ++Part) {		for (unsigned Part = 1; Part < UF; ++Part) {
Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);		Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
if (Op != Instruction::ICmp && Op != Instruction::FCmp)		if (Op != Instruction::ICmp && Op != Instruction::FCmp)
// Floating point operations had to be 'fast' to enable the reduction.		// Floating point operations had to be 'fast' to enable the reduction.
ReducedPartRdx = addFastMathFlag(		ReducedPartRdx = addFastMathFlag(
Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart,		Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart,
ReducedPartRdx, "bin.rdx"));		ReducedPartRdx, "bin.rdx"),
		RdxDesc.getFastMathFlags());
else		else
ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,		ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
RdxPart);		RdxPart);
}		}

if (VF > 1) {		if (VF > 1) {
bool NoNaN = Legal->hasFunNoNaNAttr();		bool NoNaN = Legal->hasFunNoNaNAttr();
ReducedPartRdx =		ReducedPartRdx =
▲ Show 20 Lines • Show All 3,962 Lines • Show Last 20 Lines

lib/Transforms/Vectorize/SLPVectorizer.cpp

Show First 20 Lines • Show All 5,923 Lines • ▼ Show 20 Lines	Value emitReduction(Value VectorizedValue, IRBuilder<> &Builder,
unsigned ReduxWidth, const TargetTransformInfo *TTI) {		unsigned ReduxWidth, const TargetTransformInfo *TTI) {
assert(VectorizedValue && "Need to have a vectorized tree node");		assert(VectorizedValue && "Need to have a vectorized tree node");
assert(isPowerOf2_32(ReduxWidth) &&		assert(isPowerOf2_32(ReduxWidth) &&
"We only handle power-of-two reductions for now");		"We only handle power-of-two reductions for now");

if (!IsPairwiseReduction)		if (!IsPairwiseReduction)
return createSimpleTargetReduction(		return createSimpleTargetReduction(
Builder, TTI, ReductionData.getOpcode(), VectorizedValue,		Builder, TTI, ReductionData.getOpcode(), VectorizedValue,
ReductionData.getFlags(), ReductionOps.back());		ReductionData.getFlags(), FastMathFlags::getFast(),
		ReductionOps.back());

Value *TmpVec = VectorizedValue;		Value *TmpVec = VectorizedValue;
for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {		for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
Value *LeftMask =		Value *LeftMask =
createRdxShuffleMask(ReduxWidth, i, true, true, Builder);		createRdxShuffleMask(ReduxWidth, i, true, true, Builder);
Value *RightMask =		Value *RightMask =
createRdxShuffleMask(ReduxWidth, i, true, false, Builder);		createRdxShuffleMask(ReduxWidth, i, true, false, Builder);

▲ Show 20 Lines • Show All 546 Lines • Show Last 20 Lines

test/Transforms/LoopVectorize/reduction-fastmath.ll

This file was added.

				; RUN: opt -S -loop-vectorize < %s \| FileCheck %s

				target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
				target triple = "x86_64-unknown-linux-gnu"

				define float @reduction_sum_float_ieee(i32 %n, float* %array) {
				; CHECK-LABEL: define float @reduction_sum_float_ieee(
				entry:
				%entry.cond = icmp ne i32 0, 4096
				br i1 %entry.cond, label %loop, label %loop.exit

				loop:
				%idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
				%sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
				%address = getelementptr float, float* %array, i32 %idx
				%value = load float, float* %address
				%sum.inc = fadd float %sum, %value
				%idx.inc = add i32 %idx, 1
				%be.cond = icmp ne i32 %idx.inc, 4096
				br i1 %be.cond, label %loop, label %loop.exit

				loop.exit:
				%sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
				; CHECK-NOT: %wide.load = load <4 x float>, <4 x float>*
				; CHECK: ret float %sum.lcssa
				ret float %sum.lcssa
				}

				define float @reduction_sum_float_fastmath(i32 %n, float* %array) {
				; CHECK-LABEL: define float @reduction_sum_float_fastmath(
				entry:
				%entry.cond = icmp ne i32 0, 4096
				br i1 %entry.cond, label %loop, label %loop.exit

				loop:
				%idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
				%sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
				%address = getelementptr float, float* %array, i32 %idx
				%value = load float, float* %address
				%sum.inc = fadd fast float %sum, %value
				%idx.inc = add i32 %idx, 1
				%be.cond = icmp ne i32 %idx.inc, 4096
				br i1 %be.cond, label %loop, label %loop.exit

				loop.exit:
				%sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
				; CHECK: %wide.load = load <4 x float>, <4 x float>*
				; CHECK: ret float %sum.lcssa
				sdesmalenUnsubmitted Done Reply Inline Actions Do you want to add a check for the 'fast' attribute on the reductions in the middle.block, similar to what you've done for reduction_sum_float_only_reassoc_and_contract ? sdesmalen: Do you want to add a check for the 'fast' attribute on the reductions in the middle.block…
				ret float %sum.lcssa
				}

				define float @reduction_sum_float_only_reassoc(i32 %n, float* %array) {
				; CHECK-LABEL: define float @reduction_sum_float_only_reassoc(
				; CHECK-NOT: fadd fast
				; CHECK: fadd reassoc <4 x float>
				; CHECK: fadd reassoc <4 x float>
				; CHECK: fadd reassoc <4 x float>
				; CHECK: fadd reassoc <4 x float>
				; CHECK: fadd reassoc <4 x float>

				entry:
				%entry.cond = icmp ne i32 0, 4096
				br i1 %entry.cond, label %loop, label %loop.exit

				loop:
				%idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
				%sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
				%address = getelementptr float, float* %array, i32 %idx
				%value = load float, float* %address
				%sum.inc = fadd reassoc float %sum, %value
				%idx.inc = add i32 %idx, 1
				%be.cond = icmp ne i32 %idx.inc, 4096
				br i1 %be.cond, label %loop, label %loop.exit

				loop.exit:
				%sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
				; CHECK: ret float %sum.lcssa
				ret float %sum.lcssa
				}

				define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, float* %array) {
				; CHECK-LABEL: define float @reduction_sum_float_only_reassoc_and_contract(
				; CHECK-NOT: fadd fast
				; CHECK: fadd reassoc contract <4 x float>
				; CHECK: fadd reassoc contract <4 x float>
				; CHECK: fadd reassoc contract <4 x float>
				; CHECK: fadd reassoc contract <4 x float>
				; CHECK: fadd reassoc contract <4 x float>

				entry:
				%entry.cond = icmp ne i32 0, 4096
				br i1 %entry.cond, label %loop, label %loop.exit

				loop:
				%idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
				%sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
				%address = getelementptr float, float* %array, i32 %idx
				%value = load float, float* %address
				%sum.inc = fadd reassoc contract float %sum, %value
				%idx.inc = add i32 %idx, 1
				%be.cond = icmp ne i32 %idx.inc, 4096
				br i1 %be.cond, label %loop, label %loop.exit

				loop.exit:
				%sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
				; CHECK: ret float %sum.lcssa
				ret float %sum.lcssa
				}

This is an archive of the discontinued LLVM Phabricator instance.

Relax constraints for reduction vectorization
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 189272

include/llvm/Analysis/IVDescriptors.h

include/llvm/IR/Operator.h

include/llvm/Transforms/Utils/LoopUtils.h

lib/Analysis/IVDescriptors.cpp

lib/CodeGen/ExpandReductions.cpp

lib/Transforms/Utils/LoopUtils.cpp

lib/Transforms/Vectorize/LoopVectorize.cpp

lib/Transforms/Vectorize/SLPVectorizer.cpp

test/Transforms/LoopVectorize/reduction-fastmath.ll

This is an archive of the discontinued LLVM Phabricator instance.

Relax constraints for reduction vectorizationClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 189272

include/llvm/Analysis/IVDescriptors.h

include/llvm/IR/Operator.h

include/llvm/Transforms/Utils/LoopUtils.h

lib/Analysis/IVDescriptors.cpp

lib/CodeGen/ExpandReductions.cpp

lib/Transforms/Utils/LoopUtils.cpp

lib/Transforms/Vectorize/LoopVectorize.cpp

lib/Transforms/Vectorize/SLPVectorizer.cpp

test/Transforms/LoopVectorize/reduction-fastmath.ll

Relax constraints for reduction vectorization
ClosedPublic