Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Transforms/Utils/LoopUtils.cpp
Show First 20 Lines • Show All 669 Lines • ▼ Show 20 Lines | bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, | ||||
ScalarEvolution::LoopDisposition LD = | ScalarEvolution::LoopDisposition LD = | ||||
SE.getLoopDisposition(InnerLoopBECountSC, OuterL); | SE.getLoopDisposition(InnerLoopBECountSC, OuterL); | ||||
if (LD != ScalarEvolution::LoopInvariant) | if (LD != ScalarEvolution::LoopInvariant) | ||||
return false; | return false; | ||||
return true; | return true; | ||||
} | } | ||||
static Value *addFastMathFlag(Value *V, FastMathFlags FMF) { | |||||
if (isa<FPMathOperator>(V)) | |||||
cast<Instruction>(V)->setFastMathFlags(FMF); | |||||
return V; | |||||
} | |||||
Value *llvm::createMinMaxOp(IRBuilder<> &Builder, | Value *llvm::createMinMaxOp(IRBuilder<> &Builder, | ||||
RecurrenceDescriptor::MinMaxRecurrenceKind RK, | RecurrenceDescriptor::MinMaxRecurrenceKind RK, | ||||
Value *Left, Value *Right) { | Value *Left, Value *Right) { | ||||
CmpInst::Predicate P = CmpInst::ICMP_NE; | CmpInst::Predicate P = CmpInst::ICMP_NE; | ||||
switch (RK) { | switch (RK) { | ||||
default: | default: | ||||
llvm_unreachable("Unknown min/max recurrence kind"); | llvm_unreachable("Unknown min/max recurrence kind"); | ||||
case RecurrenceDescriptor::MRK_UIntMin: | case RecurrenceDescriptor::MRK_UIntMin: | ||||
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, | ||||
return Result; | return Result; | ||||
} | } | ||||
// Helper to generate a log2 shuffle reduction. | // Helper to generate a log2 shuffle reduction. | ||||
Value * | Value * | ||||
llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, | llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, | ||||
RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, | RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, | ||||
FastMathFlags FMF, ArrayRef<Value *> RedOps) { | ArrayRef<Value *> RedOps) { | ||||
unsigned VF = Src->getType()->getVectorNumElements(); | unsigned VF = Src->getType()->getVectorNumElements(); | ||||
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles | // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles | ||||
// and vector ops, reducing the set of values being computed by half each | // and vector ops, reducing the set of values being computed by half each | ||||
// round. | // round. | ||||
assert(isPowerOf2_32(VF) && | assert(isPowerOf2_32(VF) && | ||||
"Reduction emission only supported for pow2 vectors!"); | "Reduction emission only supported for pow2 vectors!"); | ||||
Value *TmpVec = Src; | Value *TmpVec = Src; | ||||
SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); | SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); | ||||
for (unsigned i = VF; i != 1; i >>= 1) { | for (unsigned i = VF; i != 1; i >>= 1) { | ||||
// Move the upper half of the vector to the lower half. | // Move the upper half of the vector to the lower half. | ||||
for (unsigned j = 0; j != i / 2; ++j) | for (unsigned j = 0; j != i / 2; ++j) | ||||
ShuffleMask[j] = Builder.getInt32(i / 2 + j); | ShuffleMask[j] = Builder.getInt32(i / 2 + j); | ||||
// Fill the rest of the mask with undef. | // Fill the rest of the mask with undef. | ||||
std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), | std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), | ||||
UndefValue::get(Builder.getInt32Ty())); | UndefValue::get(Builder.getInt32Ty())); | ||||
Value *Shuf = Builder.CreateShuffleVector( | Value *Shuf = Builder.CreateShuffleVector( | ||||
TmpVec, UndefValue::get(TmpVec->getType()), | TmpVec, UndefValue::get(TmpVec->getType()), | ||||
ConstantVector::get(ShuffleMask), "rdx.shuf"); | ConstantVector::get(ShuffleMask), "rdx.shuf"); | ||||
if (Op != Instruction::ICmp && Op != Instruction::FCmp) { | if (Op != Instruction::ICmp && Op != Instruction::FCmp) { | ||||
// Floating point operations had to be 'fast' to enable the reduction. | // The builder propagates its fast-math-flags setting. | ||||
TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, | TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf, | ||||
TmpVec, Shuf, "bin.rdx"), | "bin.rdx"); | ||||
FMF); | |||||
} else { | } else { | ||||
assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && | assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && | ||||
"Invalid min/max"); | "Invalid min/max"); | ||||
TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf); | TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf); | ||||
} | } | ||||
if (!RedOps.empty()) | if (!RedOps.empty()) | ||||
propagateIRFlags(TmpVec, RedOps); | propagateIRFlags(TmpVec, RedOps); | ||||
} | } | ||||
// The result is in the first element of the vector. | // The result is in the first element of the vector. | ||||
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); | return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); | ||||
} | } | ||||
/// Create a simple vector reduction specified by an opcode and some | /// Create a simple vector reduction specified by an opcode and some | ||||
/// flags (if generating min/max reductions). | /// flags (if generating min/max reductions). | ||||
Value *llvm::createSimpleTargetReduction( | Value *llvm::createSimpleTargetReduction( | ||||
IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, | IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, | ||||
Value *Src, TargetTransformInfo::ReductionFlags Flags, FastMathFlags FMF, | Value *Src, TargetTransformInfo::ReductionFlags Flags, | ||||
ArrayRef<Value *> RedOps) { | ArrayRef<Value *> RedOps) { | ||||
assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); | assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); | ||||
Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); | Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); | ||||
std::function<Value *()> BuildFunc; | std::function<Value *()> BuildFunc; | ||||
using RD = RecurrenceDescriptor; | using RD = RecurrenceDescriptor; | ||||
RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; | RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; | ||||
// TODO: Support creating ordered reductions. | // TODO: Support creating ordered reductions. | ||||
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines | case Instruction::FCmp: | ||||
} | } | ||||
break; | break; | ||||
default: | default: | ||||
llvm_unreachable("Unhandled opcode"); | llvm_unreachable("Unhandled opcode"); | ||||
break; | break; | ||||
} | } | ||||
if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) | if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) | ||||
return BuildFunc(); | return BuildFunc(); | ||||
return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, FMF, RedOps); | return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); | ||||
} | } | ||||
/// Create a vector reduction using a given recurrence descriptor. | /// Create a vector reduction using a given recurrence descriptor. | ||||
Value *llvm::createTargetReduction(IRBuilder<> &B, | Value *llvm::createTargetReduction(IRBuilder<> &B, | ||||
const TargetTransformInfo *TTI, | const TargetTransformInfo *TTI, | ||||
RecurrenceDescriptor &Desc, Value *Src, | RecurrenceDescriptor &Desc, Value *Src, | ||||
bool NoNaN) { | bool NoNaN) { | ||||
// TODO: Support in-order reductions based on the recurrence descriptor. | // TODO: Support in-order reductions based on the recurrence descriptor. | ||||
using RD = RecurrenceDescriptor; | using RD = RecurrenceDescriptor; | ||||
RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); | RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); | ||||
TargetTransformInfo::ReductionFlags Flags; | TargetTransformInfo::ReductionFlags Flags; | ||||
Flags.NoNaN = NoNaN; | Flags.NoNaN = NoNaN; | ||||
// All ops in the reduction inherit fast-math-flags from the recurrence | |||||
// descriptor. | |||||
IRBuilder<>::FastMathFlagGuard FMFGuard(B); | |||||
B.setFastMathFlags(Desc.getFastMathFlags()); | |||||
switch (RecKind) { | switch (RecKind) { | ||||
case RD::RK_FloatAdd: | case RD::RK_FloatAdd: | ||||
return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags, | return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); | ||||
Desc.getFastMathFlags()); | |||||
case RD::RK_FloatMult: | case RD::RK_FloatMult: | ||||
return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags, | return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); | ||||
Desc.getFastMathFlags()); | |||||
case RD::RK_IntegerAdd: | case RD::RK_IntegerAdd: | ||||
return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags, | return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); | ||||
Desc.getFastMathFlags()); | |||||
case RD::RK_IntegerMult: | case RD::RK_IntegerMult: | ||||
return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags, | return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); | ||||
Desc.getFastMathFlags()); | |||||
case RD::RK_IntegerAnd: | case RD::RK_IntegerAnd: | ||||
return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags, | return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); | ||||
Desc.getFastMathFlags()); | |||||
case RD::RK_IntegerOr: | case RD::RK_IntegerOr: | ||||
return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags, | return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); | ||||
Desc.getFastMathFlags()); | |||||
case RD::RK_IntegerXor: | case RD::RK_IntegerXor: | ||||
return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags, | return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); | ||||
Desc.getFastMathFlags()); | |||||
case RD::RK_IntegerMinMax: { | case RD::RK_IntegerMinMax: { | ||||
RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); | RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); | ||||
Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); | Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); | ||||
Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); | Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); | ||||
return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags, | return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); | ||||
Desc.getFastMathFlags()); | |||||
} | } | ||||
case RD::RK_FloatMinMax: { | case RD::RK_FloatMinMax: { | ||||
Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; | Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; | ||||
return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags, | return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); | ||||
Desc.getFastMathFlags()); | |||||
} | } | ||||
default: | default: | ||||
llvm_unreachable("Unhandled RecKind"); | llvm_unreachable("Unhandled RecKind"); | ||||
} | } | ||||
} | } | ||||
void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { | void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { | ||||
auto *VecOp = dyn_cast<Instruction>(I); | auto *VecOp = dyn_cast<Instruction>(I); | ||||
▲ Show 20 Lines • Show All 52 Lines • Show Last 20 Lines |