Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Show First 20 Lines • Show All 6,099 Lines • ▼ Show 20 Lines | bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) { | ||||
// vectors and rely on the backend to split them to legal sizes. | // vectors and rely on the backend to split them to legal sizes. | ||||
unsigned NumReducedVals = ReducedVals.size(); | unsigned NumReducedVals = ReducedVals.size(); | ||||
if (NumReducedVals < 4) | if (NumReducedVals < 4) | ||||
return false; | return false; | ||||
unsigned ReduxWidth = PowerOf2Floor(NumReducedVals); | unsigned ReduxWidth = PowerOf2Floor(NumReducedVals); | ||||
Value *VectorizedTree = nullptr; | Value *VectorizedTree = nullptr; | ||||
// FIXME: Fast-math-flags should be set based on the instructions in the | |||||
// reduction (not all of 'fast' are required). | |||||
IRBuilder<> Builder(cast<Instruction>(ReductionRoot)); | IRBuilder<> Builder(cast<Instruction>(ReductionRoot)); | ||||
FastMathFlags Unsafe; | FastMathFlags Unsafe; | ||||
Unsafe.setFast(); | Unsafe.setFast(); | ||||
Builder.setFastMathFlags(Unsafe); | Builder.setFastMathFlags(Unsafe); | ||||
unsigned i = 0; | unsigned i = 0; | ||||
BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues; | BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues; | ||||
// The same extra argument may be used several time, so log each attempt | // The same extra argument may be used several time, so log each attempt | ||||
▲ Show 20 Lines • Show All 173 Lines • ▼ Show 20 Lines | private: | ||||
/// Emit a horizontal reduction of the vectorized value. | /// Emit a horizontal reduction of the vectorized value. | ||||
Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder, | Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder, | ||||
unsigned ReduxWidth, const TargetTransformInfo *TTI) { | unsigned ReduxWidth, const TargetTransformInfo *TTI) { | ||||
assert(VectorizedValue && "Need to have a vectorized tree node"); | assert(VectorizedValue && "Need to have a vectorized tree node"); | ||||
assert(isPowerOf2_32(ReduxWidth) && | assert(isPowerOf2_32(ReduxWidth) && | ||||
"We only handle power-of-two reductions for now"); | "We only handle power-of-two reductions for now"); | ||||
if (!IsPairwiseReduction) | if (!IsPairwiseReduction) { | ||||
// FIXME: The builder should use an FMF guard. It should not be hard-coded | |||||
vporpo: If I am not mistaken, the flags are set to 'fast' by `tryToReduce()` line 6111. So maybe it is… | |||||
Yes, that should be a good intermediate step until we can get this cleaned up properly. spatel: Yes, that should be a good intermediate step until we can get this cleaned up properly. | |||||
// to 'fast'. | |||||
assert(Builder.getFastMathFlags().isFast() && "Expected 'fast' FMF"); | |||||
return createSimpleTargetReduction( | return createSimpleTargetReduction( | ||||
Builder, TTI, ReductionData.getOpcode(), VectorizedValue, | Builder, TTI, ReductionData.getOpcode(), VectorizedValue, | ||||
ReductionData.getFlags(), FastMathFlags::getFast(), | ReductionData.getFlags(), ReductionOps.back()); | ||||
ReductionOps.back()); | } | ||||
Value *TmpVec = VectorizedValue; | Value *TmpVec = VectorizedValue; | ||||
for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) { | for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) { | ||||
Value *LeftMask = | Value *LeftMask = | ||||
createRdxShuffleMask(ReduxWidth, i, true, true, Builder); | createRdxShuffleMask(ReduxWidth, i, true, true, Builder); | ||||
Value *RightMask = | Value *RightMask = | ||||
createRdxShuffleMask(ReduxWidth, i, true, false, Builder); | createRdxShuffleMask(ReduxWidth, i, true, false, Builder); | ||||
▲ Show 20 Lines • Show All 546 Lines • Show Last 20 Lines |
If I am not mistaken, the flags are set to 'fast' by tryToReduce() line 6111. So maybe it is better to check them with an assert() ?