Diff 190156

llvm/trunk/include/llvm/Analysis/IVDescriptors.h

Show First 20 Lines • Show All 83 Lines • ▼ Show 20 Lines	enum MinMaxRecurrenceKind {
MRK_SIntMax,		MRK_SIntMax,
MRK_FloatMin,		MRK_FloatMin,
MRK_FloatMax		MRK_FloatMax
};		};

RecurrenceDescriptor() = default;		RecurrenceDescriptor() = default;

RecurrenceDescriptor(Value Start, Instruction Exit, RecurrenceKind K,		RecurrenceDescriptor(Value Start, Instruction Exit, RecurrenceKind K,
MinMaxRecurrenceKind MK, Instruction UAI, Type RT,		FastMathFlags FMF, MinMaxRecurrenceKind MK,
bool Signed, SmallPtrSetImpl<Instruction *> &CI)		Instruction UAI, Type RT, bool Signed,
: StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK),		SmallPtrSetImpl<Instruction *> &CI)
UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {		: StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF),
		MinMaxKind(MK), UnsafeAlgebraInst(UAI), RecurrenceType(RT),
		IsSigned(Signed) {
CastInsts.insert(CI.begin(), CI.end());		CastInsts.insert(CI.begin(), CI.end());
}		}

/// This POD struct holds information about a potential recurrence operation.		/// This POD struct holds information about a potential recurrence operation.
class InstDesc {		class InstDesc {
public:		public:
InstDesc(bool IsRecur, Instruction I, Instruction UAI = nullptr)		InstDesc(bool IsRecur, Instruction I, Instruction UAI = nullptr)
: IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid),		: IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid),
▲ Show 20 Lines • Show All 89 Lines • ▼ Show 20 Lines	public:
isFirstOrderRecurrence(PHINode Phi, Loop TheLoop,		isFirstOrderRecurrence(PHINode Phi, Loop TheLoop,
DenseMap<Instruction , Instruction > &SinkAfter,		DenseMap<Instruction , Instruction > &SinkAfter,
DominatorTree *DT);		DominatorTree *DT);

RecurrenceKind getRecurrenceKind() { return Kind; }		RecurrenceKind getRecurrenceKind() { return Kind; }

MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }		MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }

		FastMathFlags getFastMathFlags() { return FMF; }

TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }		TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }

Instruction *getLoopExitInstr() { return LoopExitInstr; }		Instruction *getLoopExitInstr() { return LoopExitInstr; }

/// Returns true if the recurrence has unsafe algebra which requires a relaxed		/// Returns true if the recurrence has unsafe algebra which requires a relaxed
/// floating-point model.		/// floating-point model.
bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }		bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }

Show All 23 Lines
private:		private:
// The starting value of the recurrence.		// The starting value of the recurrence.
// It does not have to be zero!		// It does not have to be zero!
TrackingVH<Value> StartValue;		TrackingVH<Value> StartValue;
// The instruction who's value is used outside the loop.		// The instruction who's value is used outside the loop.
Instruction *LoopExitInstr = nullptr;		Instruction *LoopExitInstr = nullptr;
// The kind of the recurrence.		// The kind of the recurrence.
RecurrenceKind Kind = RK_NoRecurrence;		RecurrenceKind Kind = RK_NoRecurrence;
		// The fast-math flags on the recurrent instructions. We propagate these
		// fast-math flags into the vectorized FP instructions we generate.
		FastMathFlags FMF;
// If this a min/max recurrence the kind of recurrence.		// If this a min/max recurrence the kind of recurrence.
MinMaxRecurrenceKind MinMaxKind = MRK_Invalid;		MinMaxRecurrenceKind MinMaxKind = MRK_Invalid;
// First occurrence of unasfe algebra in the PHI's use-chain.		// First occurrence of unasfe algebra in the PHI's use-chain.
Instruction *UnsafeAlgebraInst = nullptr;		Instruction *UnsafeAlgebraInst = nullptr;
// The type of the recurrence.		// The type of the recurrence.
Type *RecurrenceType = nullptr;		Type *RecurrenceType = nullptr;
// True if all source operands of the recurrence are SExtInsts.		// True if all source operands of the recurrence are SExtInsts.
bool IsSigned = false;		bool IsSigned = false;
▲ Show 20 Lines • Show All 109 Lines • Show Last 20 Lines

llvm/trunk/include/llvm/IR/Operator.h

Show First 20 Lines • Show All 181 Lines • ▼ Show 20 Lines	enum {
NoSignedZeros = (1 << 3),		NoSignedZeros = (1 << 3),
AllowReciprocal = (1 << 4),		AllowReciprocal = (1 << 4),
AllowContract = (1 << 5),		AllowContract = (1 << 5),
ApproxFunc = (1 << 6)		ApproxFunc = (1 << 6)
};		};

FastMathFlags() = default;		FastMathFlags() = default;

		static FastMathFlags getFast() {
		FastMathFlags FMF;
		FMF.setFast();
		return FMF;
		}

bool any() const { return Flags != 0; }		bool any() const { return Flags != 0; }
bool none() const { return Flags == 0; }		bool none() const { return Flags == 0; }
bool all() const { return Flags == ~0U; }		bool all() const { return Flags == ~0U; }

void clear() { Flags = 0; }		void clear() { Flags = 0; }
void set() { Flags = ~0U; }		void set() { Flags = ~0U; }

/// Flag queries		/// Flag queries
▲ Show 20 Lines • Show All 387 Lines • Show Last 20 Lines

llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h

Show First 20 Lines • Show All 290 Lines • ▼ Show 20 Lines	getOrderedReduction(IRBuilder<> &Builder, Value Acc, Value Src, unsigned Op,
RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =		RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =
RecurrenceDescriptor::MRK_Invalid,		RecurrenceDescriptor::MRK_Invalid,
ArrayRef<Value *> RedOps = None);		ArrayRef<Value *> RedOps = None);

/// Generates a vector reduction using shufflevectors to reduce the value.		/// Generates a vector reduction using shufflevectors to reduce the value.
Value getShuffleReduction(IRBuilder<> &Builder, Value Src, unsigned Op,		Value getShuffleReduction(IRBuilder<> &Builder, Value Src, unsigned Op,
RecurrenceDescriptor::MinMaxRecurrenceKind		RecurrenceDescriptor::MinMaxRecurrenceKind
MinMaxKind = RecurrenceDescriptor::MRK_Invalid,		MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
		FastMathFlags FMF = FastMathFlags(),
ArrayRef<Value *> RedOps = None);		ArrayRef<Value *> RedOps = None);

/// Create a target reduction of the given vector. The reduction operation		/// Create a target reduction of the given vector. The reduction operation
/// is described by the \p Opcode parameter. min/max reductions require		/// is described by the \p Opcode parameter. min/max reductions require
/// additional information supplied in \p Flags.		/// additional information supplied in \p Flags.
/// The target is queried to determine if intrinsics or shuffle sequences are		/// The target is queried to determine if intrinsics or shuffle sequences are
/// required to implement the reduction.		/// required to implement the reduction.
Value *createSimpleTargetReduction(IRBuilder<> &B,		Value *createSimpleTargetReduction(IRBuilder<> &B,
const TargetTransformInfo *TTI,		const TargetTransformInfo *TTI,
unsigned Opcode, Value *Src,		unsigned Opcode, Value *Src,
TargetTransformInfo::ReductionFlags Flags =		TargetTransformInfo::ReductionFlags Flags =
TargetTransformInfo::ReductionFlags(),		TargetTransformInfo::ReductionFlags(),
		FastMathFlags FMF = FastMathFlags(),
ArrayRef<Value *> RedOps = None);		ArrayRef<Value *> RedOps = None);

/// Create a generic target reduction using a recurrence descriptor \p Desc		/// Create a generic target reduction using a recurrence descriptor \p Desc
/// The target is queried to determine if intrinsics or shuffle sequences are		/// The target is queried to determine if intrinsics or shuffle sequences are
/// required to implement the reduction.		/// required to implement the reduction.
Value createTargetReduction(IRBuilder<> &B, const TargetTransformInfo TTI,		Value createTargetReduction(IRBuilder<> &B, const TargetTransformInfo TTI,
RecurrenceDescriptor &Desc, Value *Src,		RecurrenceDescriptor &Desc, Value *Src,
bool NoNaN = false);		bool NoNaN = false);
Show All 28 Lines

llvm/trunk/lib/Analysis/IVDescriptors.cpp

Show First 20 Lines • Show All 245 Lines • ▼ Show 20 Lines	if (!isIntegerRecurrenceKind(Kind))
return false;		return false;
if (isArithmeticRecurrenceKind(Kind))		if (isArithmeticRecurrenceKind(Kind))
Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);		Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
}		}

Worklist.push_back(Start);		Worklist.push_back(Start);
VisitedInsts.insert(Start);		VisitedInsts.insert(Start);

		// Start with all flags set because we will intersect this with the reduction
		// flags from all the reduction operations.
		FastMathFlags FMF = FastMathFlags::getFast();

// A value in the reduction can be used:		// A value in the reduction can be used:
// - By the reduction:		// - By the reduction:
// - Reduction operation:		// - Reduction operation:
// - One use of reduction value (safe).		// - One use of reduction value (safe).
// - Multiple use of reduction value (not safe).		// - Multiple use of reduction value (not safe).
// - PHI:		// - PHI:
// - All uses of the PHI must be the reduction (safe).		// - All uses of the PHI must be the reduction (safe).
// - Otherwise, not safe.		// - Otherwise, not safe.
Show All 29 Lines	while (!Worklist.empty()) {

// Any reduction instruction must be of one of the allowed kinds. We ignore		// Any reduction instruction must be of one of the allowed kinds. We ignore
// the starting value (the Phi or an AND instruction if the Phi has been		// the starting value (the Phi or an AND instruction if the Phi has been
// type-promoted).		// type-promoted).
if (Cur != Start) {		if (Cur != Start) {
ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);		ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
if (!ReduxDesc.isRecurrence())		if (!ReduxDesc.isRecurrence())
return false;		return false;
		if (isa<FPMathOperator>(ReduxDesc.getPatternInst()))
		FMF &= ReduxDesc.getPatternInst()->getFastMathFlags();
}		}

bool IsASelect = isa<SelectInst>(Cur);		bool IsASelect = isa<SelectInst>(Cur);

// A conditional reduction operation must only have 2 or less uses in		// A conditional reduction operation must only have 2 or less uses in
// VisitedInsts.		// VisitedInsts.
if (IsASelect && (Kind == RK_FloatAdd \|\| Kind == RK_FloatMult) &&		if (IsASelect && (Kind == RK_FloatAdd \|\| Kind == RK_FloatMult) &&
hasMultipleUsesOf(Cur, VisitedInsts, 2))		hasMultipleUsesOf(Cur, VisitedInsts, 2))
▲ Show 20 Lines • Show All 129 Lines • ▼ Show 20 Lines	bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// We found a reduction var if we have reached the original phi node and we		// We found a reduction var if we have reached the original phi node and we
// only have a single instruction with out-of-loop users.		// only have a single instruction with out-of-loop users.

// The ExitInstruction(Instruction which is allowed to have out-of-loop users)		// The ExitInstruction(Instruction which is allowed to have out-of-loop users)
// is saved as part of the RecurrenceDescriptor.		// is saved as part of the RecurrenceDescriptor.

// Save the description of this reduction variable.		// Save the description of this reduction variable.
RecurrenceDescriptor RD(		RecurrenceDescriptor RD(
RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),		RdxStart, ExitInstruction, Kind, FMF, ReduxDesc.getMinMaxKind(),
ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);		ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
RedDes = RD;		RedDes = RD;

return true;		return true;
}		}

/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction		/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
/// pattern corresponding to a min(X, Y) or max(X, Y).		/// pattern corresponding to a min(X, Y) or max(X, Y).
▲ Show 20 Lines • Show All 92 Lines • ▼ Show 20 Lines	RecurrenceDescriptor::isConditionalRdxPattern(

return InstDesc(false, I);		return InstDesc(false, I);
}		}

RecurrenceDescriptor::InstDesc		RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,		RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
InstDesc &Prev, bool HasFunNoNaNAttr) {		InstDesc &Prev, bool HasFunNoNaNAttr) {
Instruction *UAI = Prev.getUnsafeAlgebraInst();		Instruction *UAI = Prev.getUnsafeAlgebraInst();
if (!UAI && isa<FPMathOperator>(I) && !I->isFast())		if (!UAI && isa<FPMathOperator>(I) && !I->hasAllowReassoc())
UAI = I; // Found an unsafe (unvectorizable) algebra instruction.		UAI = I; // Found an unsafe (unvectorizable) algebra instruction.

switch (I->getOpcode()) {		switch (I->getOpcode()) {
default:		default:
return InstDesc(false, I);		return InstDesc(false, I);
case Instruction::PHI:		case Instruction::PHI:
return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());		return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
case Instruction::Sub:		case Instruction::Sub:
▲ Show 20 Lines • Show All 526 Lines • Show Last 20 Lines

llvm/trunk/lib/CodeGen/ExpandReductions.cpp

Show First 20 Lines • Show All 112 Lines • ▼ Show 20 Lines	case Intrinsic::experimental_vector_reduce_fmin:
Vec = II->getArgOperand(0);		Vec = II->getArgOperand(0);
MRK = getMRK(ID);		MRK = getMRK(ID);
break;		break;
default:		default:
continue;		continue;
}		}
if (!TTI->shouldExpandReduction(II))		if (!TTI->shouldExpandReduction(II))
continue;		continue;
		FastMathFlags FMF =
		isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
Value *Rdx =		Value *Rdx =
IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)		IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
: getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);		: getShuffleReduction(Builder, Vec, getOpcode(ID), MRK, FMF);
II->replaceAllUsesWith(Rdx);		II->replaceAllUsesWith(Rdx);
II->eraseFromParent();		II->eraseFromParent();
Changed = true;		Changed = true;
}		}
return Changed;		return Changed;
}		}

class ExpandReductions : public FunctionPass {		class ExpandReductions : public FunctionPass {
Show All 38 Lines

llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp

Show First 20 Lines • Show All 665 Lines • ▼ Show 20 Lines	bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
ScalarEvolution::LoopDisposition LD =		ScalarEvolution::LoopDisposition LD =
SE.getLoopDisposition(InnerLoopBECountSC, OuterL);		SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
if (LD != ScalarEvolution::LoopInvariant)		if (LD != ScalarEvolution::LoopInvariant)
return false;		return false;

return true;		return true;
}		}

/// Adds a 'fast' flag to floating point operations.		static Value addFastMathFlag(Value V, FastMathFlags FMF) {
static Value addFastMathFlag(Value V) {		if (isa<FPMathOperator>(V))
if (isa<FPMathOperator>(V)) {		cast<Instruction>(V)->setFastMathFlags(FMF);
FastMathFlags Flags;
Flags.setFast();
cast<Instruction>(V)->setFastMathFlags(Flags);
}
return V;		return V;
}		}

Value *llvm::createMinMaxOp(IRBuilder<> &Builder,		Value *llvm::createMinMaxOp(IRBuilder<> &Builder,
RecurrenceDescriptor::MinMaxRecurrenceKind RK,		RecurrenceDescriptor::MinMaxRecurrenceKind RK,
Value Left, Value Right) {		Value Left, Value Right) {
CmpInst::Predicate P = CmpInst::ICMP_NE;		CmpInst::Predicate P = CmpInst::ICMP_NE;
switch (RK) {		switch (RK) {
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines	llvm::getOrderedReduction(IRBuilder<> &Builder, Value Acc, Value Src,

return Result;		return Result;
}		}

// Helper to generate a log2 shuffle reduction.		// Helper to generate a log2 shuffle reduction.
Value *		Value *
llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,		llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,		RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
ArrayRef<Value *> RedOps) {		FastMathFlags FMF, ArrayRef<Value *> RedOps) {
unsigned VF = Src->getType()->getVectorNumElements();		unsigned VF = Src->getType()->getVectorNumElements();
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles		// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
// and vector ops, reducing the set of values being computed by half each		// and vector ops, reducing the set of values being computed by half each
// round.		// round.
assert(isPowerOf2_32(VF) &&		assert(isPowerOf2_32(VF) &&
"Reduction emission only supported for pow2 vectors!");		"Reduction emission only supported for pow2 vectors!");
Value *TmpVec = Src;		Value *TmpVec = Src;
SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);		SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
for (unsigned i = VF; i != 1; i >>= 1) {		for (unsigned i = VF; i != 1; i >>= 1) {
// Move the upper half of the vector to the lower half.		// Move the upper half of the vector to the lower half.
for (unsigned j = 0; j != i / 2; ++j)		for (unsigned j = 0; j != i / 2; ++j)
ShuffleMask[j] = Builder.getInt32(i / 2 + j);		ShuffleMask[j] = Builder.getInt32(i / 2 + j);

// Fill the rest of the mask with undef.		// Fill the rest of the mask with undef.
std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),		std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
UndefValue::get(Builder.getInt32Ty()));		UndefValue::get(Builder.getInt32Ty()));

Value *Shuf = Builder.CreateShuffleVector(		Value *Shuf = Builder.CreateShuffleVector(
TmpVec, UndefValue::get(TmpVec->getType()),		TmpVec, UndefValue::get(TmpVec->getType()),
ConstantVector::get(ShuffleMask), "rdx.shuf");		ConstantVector::get(ShuffleMask), "rdx.shuf");

if (Op != Instruction::ICmp && Op != Instruction::FCmp) {		if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
// Floating point operations had to be 'fast' to enable the reduction.		// Floating point operations had to be 'fast' to enable the reduction.
TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,		TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
TmpVec, Shuf, "bin.rdx"));		TmpVec, Shuf, "bin.rdx"),
		FMF);
} else {		} else {
assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&		assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
"Invalid min/max");		"Invalid min/max");
TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf);		TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf);
}		}
if (!RedOps.empty())		if (!RedOps.empty())
propagateIRFlags(TmpVec, RedOps);		propagateIRFlags(TmpVec, RedOps);
}		}
// The result is in the first element of the vector.		// The result is in the first element of the vector.
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));		return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
}		}

/// Create a simple vector reduction specified by an opcode and some		/// Create a simple vector reduction specified by an opcode and some
/// flags (if generating min/max reductions).		/// flags (if generating min/max reductions).
Value *llvm::createSimpleTargetReduction(		Value *llvm::createSimpleTargetReduction(
IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,		IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
Value *Src, TargetTransformInfo::ReductionFlags Flags,		Value *Src, TargetTransformInfo::ReductionFlags Flags, FastMathFlags FMF,
ArrayRef<Value *> RedOps) {		ArrayRef<Value *> RedOps) {
assert(isa<VectorType>(Src->getType()) && "Type must be a vector");		assert(isa<VectorType>(Src->getType()) && "Type must be a vector");

Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());		Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
std::function<Value *()> BuildFunc;		std::function<Value *()> BuildFunc;
using RD = RecurrenceDescriptor;		using RD = RecurrenceDescriptor;
RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;		RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
// TODO: Support creating ordered reductions.		// TODO: Support creating ordered reductions.
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines	case Instruction::FCmp:
}		}
break;		break;
default:		default:
llvm_unreachable("Unhandled opcode");		llvm_unreachable("Unhandled opcode");
break;		break;
}		}
if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))		if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
return BuildFunc();		return BuildFunc();
return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);		return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, FMF, RedOps);
}		}

/// Create a vector reduction using a given recurrence descriptor.		/// Create a vector reduction using a given recurrence descriptor.
Value *llvm::createTargetReduction(IRBuilder<> &B,		Value *llvm::createTargetReduction(IRBuilder<> &B,
const TargetTransformInfo *TTI,		const TargetTransformInfo *TTI,
RecurrenceDescriptor &Desc, Value *Src,		RecurrenceDescriptor &Desc, Value *Src,
bool NoNaN) {		bool NoNaN) {
// TODO: Support in-order reductions based on the recurrence descriptor.		// TODO: Support in-order reductions based on the recurrence descriptor.
using RD = RecurrenceDescriptor;		using RD = RecurrenceDescriptor;
RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();		RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
TargetTransformInfo::ReductionFlags Flags;		TargetTransformInfo::ReductionFlags Flags;
Flags.NoNaN = NoNaN;		Flags.NoNaN = NoNaN;
switch (RecKind) {		switch (RecKind) {
case RD::RK_FloatAdd:		case RD::RK_FloatAdd:
return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags,
		Desc.getFastMathFlags());
case RD::RK_FloatMult:		case RD::RK_FloatMult:
return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags,
		Desc.getFastMathFlags());
case RD::RK_IntegerAdd:		case RD::RK_IntegerAdd:
return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags,
		Desc.getFastMathFlags());
case RD::RK_IntegerMult:		case RD::RK_IntegerMult:
return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags,
		Desc.getFastMathFlags());
case RD::RK_IntegerAnd:		case RD::RK_IntegerAnd:
return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags,
		Desc.getFastMathFlags());
case RD::RK_IntegerOr:		case RD::RK_IntegerOr:
return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags,
		Desc.getFastMathFlags());
case RD::RK_IntegerXor:		case RD::RK_IntegerXor:
return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags,
		Desc.getFastMathFlags());
case RD::RK_IntegerMinMax: {		case RD::RK_IntegerMinMax: {
RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();		RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax \|\| MMKind == RD::MRK_UIntMax);		Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax \|\| MMKind == RD::MRK_UIntMax);
Flags.IsSigned = (MMKind == RD::MRK_SIntMax \|\| MMKind == RD::MRK_SIntMin);		Flags.IsSigned = (MMKind == RD::MRK_SIntMax \|\| MMKind == RD::MRK_SIntMin);
return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags,
		Desc.getFastMathFlags());
}		}
case RD::RK_FloatMinMax: {		case RD::RK_FloatMinMax: {
Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;		Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);		return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags,
		Desc.getFastMathFlags());
}		}
default:		default:
llvm_unreachable("Unhandled RecKind");		llvm_unreachable("Unhandled RecKind");
}		}
}		}

void llvm::propagateIRFlags(Value I, ArrayRef<Value > VL, Value *OpValue) {		void llvm::propagateIRFlags(Value I, ArrayRef<Value > VL, Value *OpValue) {
auto *VecOp = dyn_cast<Instruction>(I);		auto *VecOp = dyn_cast<Instruction>(I);
▲ Show 20 Lines • Show All 52 Lines • Show Last 20 Lines

llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 313 Lines • ▼ Show 20 Lines
/// will execute once for every X iterations of the loop header.		/// will execute once for every X iterations of the loop header.
///		///
/// TODO: We should use actual block probability here, if available. Currently,		/// TODO: We should use actual block probability here, if available. Currently,
/// we always assume predicated blocks have a 50% chance of executing.		/// we always assume predicated blocks have a 50% chance of executing.
static unsigned getReciprocalPredBlockProb() { return 2; }		static unsigned getReciprocalPredBlockProb() { return 2; }

/// A helper function that adds a 'fast' flag to floating-point operations.		/// A helper function that adds a 'fast' flag to floating-point operations.
static Value addFastMathFlag(Value V) {		static Value addFastMathFlag(Value V) {
if (isa<FPMathOperator>(V)) {		if (isa<FPMathOperator>(V))
FastMathFlags Flags;		cast<Instruction>(V)->setFastMathFlags(FastMathFlags::getFast());
Flags.setFast();		return V;
cast<Instruction>(V)->setFastMathFlags(Flags);
}		}

		static Value addFastMathFlag(Value V, FastMathFlags FMF) {
		if (isa<FPMathOperator>(V))
		cast<Instruction>(V)->setFastMathFlags(FMF);
return V;		return V;
}		}

/// A helper function that returns an integer or floating-point constant with		/// A helper function that returns an integer or floating-point constant with
/// value C.		/// value C.
static Constant getSignedIntOrFpConstant(Type Ty, int64_t C) {		static Constant getSignedIntOrFpConstant(Type Ty, int64_t C) {
return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)		return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
: ConstantFP::get(Ty, C);		: ConstantFP::get(Ty, C);
▲ Show 20 Lines • Show All 3,272 Lines • ▼ Show 20 Lines	void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);		unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
setDebugLocFromInst(Builder, ReducedPartRdx);		setDebugLocFromInst(Builder, ReducedPartRdx);
for (unsigned Part = 1; Part < UF; ++Part) {		for (unsigned Part = 1; Part < UF; ++Part) {
Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);		Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
if (Op != Instruction::ICmp && Op != Instruction::FCmp)		if (Op != Instruction::ICmp && Op != Instruction::FCmp)
// Floating point operations had to be 'fast' to enable the reduction.		// Floating point operations had to be 'fast' to enable the reduction.
ReducedPartRdx = addFastMathFlag(		ReducedPartRdx = addFastMathFlag(
Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart,		Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart,
ReducedPartRdx, "bin.rdx"));		ReducedPartRdx, "bin.rdx"),
		RdxDesc.getFastMathFlags());
else		else
ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,		ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
RdxPart);		RdxPart);
}		}

if (VF > 1) {		if (VF > 1) {
bool NoNaN = Legal->hasFunNoNaNAttr();		bool NoNaN = Legal->hasFunNoNaNAttr();
ReducedPartRdx =		ReducedPartRdx =
▲ Show 20 Lines • Show All 3,962 Lines • Show Last 20 Lines

llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

Show First 20 Lines • Show All 5,923 Lines • ▼ Show 20 Lines	Value emitReduction(Value VectorizedValue, IRBuilder<> &Builder,
unsigned ReduxWidth, const TargetTransformInfo *TTI) {		unsigned ReduxWidth, const TargetTransformInfo *TTI) {
assert(VectorizedValue && "Need to have a vectorized tree node");		assert(VectorizedValue && "Need to have a vectorized tree node");
assert(isPowerOf2_32(ReduxWidth) &&		assert(isPowerOf2_32(ReduxWidth) &&
"We only handle power-of-two reductions for now");		"We only handle power-of-two reductions for now");

if (!IsPairwiseReduction)		if (!IsPairwiseReduction)
return createSimpleTargetReduction(		return createSimpleTargetReduction(
Builder, TTI, ReductionData.getOpcode(), VectorizedValue,		Builder, TTI, ReductionData.getOpcode(), VectorizedValue,
ReductionData.getFlags(), ReductionOps.back());		ReductionData.getFlags(), FastMathFlags::getFast(),
		ReductionOps.back());

Value *TmpVec = VectorizedValue;		Value *TmpVec = VectorizedValue;
for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {		for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
Value *LeftMask =		Value *LeftMask =
createRdxShuffleMask(ReduxWidth, i, true, true, Builder);		createRdxShuffleMask(ReduxWidth, i, true, true, Builder);
Value *RightMask =		Value *RightMask =
createRdxShuffleMask(ReduxWidth, i, true, false, Builder);		createRdxShuffleMask(ReduxWidth, i, true, false, Builder);

▲ Show 20 Lines • Show All 546 Lines • Show Last 20 Lines

llvm/trunk/test/Transforms/LoopVectorize/reduction-fastmath.ll

				; RUN: opt -S -loop-vectorize < %s \| FileCheck %s

				target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
				target triple = "x86_64-unknown-linux-gnu"

				define float @reduction_sum_float_ieee(i32 %n, float* %array) {
				; CHECK-LABEL: define float @reduction_sum_float_ieee(
				entry:
				%entry.cond = icmp ne i32 0, 4096
				br i1 %entry.cond, label %loop, label %loop.exit

				loop:
				%idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
				%sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
				%address = getelementptr float, float* %array, i32 %idx
				%value = load float, float* %address
				%sum.inc = fadd float %sum, %value
				%idx.inc = add i32 %idx, 1
				%be.cond = icmp ne i32 %idx.inc, 4096
				br i1 %be.cond, label %loop, label %loop.exit

				loop.exit:
				%sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
				; CHECK-NOT: %wide.load = load <4 x float>, <4 x float>*
				; CHECK: ret float %sum.lcssa
				ret float %sum.lcssa
				}

				define float @reduction_sum_float_fastmath(i32 %n, float* %array) {
				; CHECK-LABEL: define float @reduction_sum_float_fastmath(
				; CHECK: fadd fast <4 x float>
				; CHECK: fadd fast <4 x float>
				; CHECK: fadd fast <4 x float>
				; CHECK: fadd fast <4 x float>
				; CHECK: fadd fast <4 x float>
				entry:
				%entry.cond = icmp ne i32 0, 4096
				br i1 %entry.cond, label %loop, label %loop.exit

				loop:
				%idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
				%sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
				%address = getelementptr float, float* %array, i32 %idx
				%value = load float, float* %address
				%sum.inc = fadd fast float %sum, %value
				%idx.inc = add i32 %idx, 1
				%be.cond = icmp ne i32 %idx.inc, 4096
				br i1 %be.cond, label %loop, label %loop.exit

				loop.exit:
				%sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
				; CHECK: ret float %sum.lcssa
				ret float %sum.lcssa
				}

				define float @reduction_sum_float_only_reassoc(i32 %n, float* %array) {
				; CHECK-LABEL: define float @reduction_sum_float_only_reassoc(
				; CHECK-NOT: fadd fast
				; CHECK: fadd reassoc <4 x float>
				; CHECK: fadd reassoc <4 x float>
				; CHECK: fadd reassoc <4 x float>
				; CHECK: fadd reassoc <4 x float>
				; CHECK: fadd reassoc <4 x float>

				entry:
				%entry.cond = icmp ne i32 0, 4096
				br i1 %entry.cond, label %loop, label %loop.exit

				loop:
				%idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
				%sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
				%address = getelementptr float, float* %array, i32 %idx
				%value = load float, float* %address
				%sum.inc = fadd reassoc float %sum, %value
				%idx.inc = add i32 %idx, 1
				%be.cond = icmp ne i32 %idx.inc, 4096
				br i1 %be.cond, label %loop, label %loop.exit

				loop.exit:
				%sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
				; CHECK: ret float %sum.lcssa
				ret float %sum.lcssa
				}

				define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, float* %array) {
				; CHECK-LABEL: define float @reduction_sum_float_only_reassoc_and_contract(
				; CHECK-NOT: fadd fast
				; CHECK: fadd reassoc contract <4 x float>
				; CHECK: fadd reassoc contract <4 x float>
				; CHECK: fadd reassoc contract <4 x float>
				; CHECK: fadd reassoc contract <4 x float>
				; CHECK: fadd reassoc contract <4 x float>

				entry:
				%entry.cond = icmp ne i32 0, 4096
				br i1 %entry.cond, label %loop, label %loop.exit

				loop:
				%idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
				%sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
				%address = getelementptr float, float* %array, i32 %idx
				%value = load float, float* %address
				%sum.inc = fadd reassoc contract float %sum, %value
				%idx.inc = add i32 %idx, 1
				%be.cond = icmp ne i32 %idx.inc, 4096
				br i1 %be.cond, label %loop, label %loop.exit

				loop.exit:
				%sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
				; CHECK: ret float %sum.lcssa
				ret float %sum.lcssa
				}

This is an archive of the discontinued LLVM Phabricator instance.

Relax constraints for reduction vectorization
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 190156

llvm/trunk/include/llvm/Analysis/IVDescriptors.h

llvm/trunk/include/llvm/IR/Operator.h

llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h

llvm/trunk/lib/Analysis/IVDescriptors.cpp

llvm/trunk/lib/CodeGen/ExpandReductions.cpp

llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp

llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/trunk/test/Transforms/LoopVectorize/reduction-fastmath.ll

This is an archive of the discontinued LLVM Phabricator instance.

Relax constraints for reduction vectorizationClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 190156

llvm/trunk/include/llvm/Analysis/IVDescriptors.h

llvm/trunk/include/llvm/IR/Operator.h

llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h

llvm/trunk/lib/Analysis/IVDescriptors.cpp

llvm/trunk/lib/CodeGen/ExpandReductions.cpp

llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp

llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/trunk/test/Transforms/LoopVectorize/reduction-fastmath.ll

Relax constraints for reduction vectorization
ClosedPublic