Skip to content

Commit f77d165

Browse files
committedFeb 23, 2017
[SLP] Fix for PR32036: Vectorized horizontal reduction returning wrong
result Summary: If the same value is used several times as an extra value, SLP vectorizer takes it into account only once instead of actual number of using. For example: ``` int val = 1; for (int y = 0; y < 8; y++) { for (int x = 0; x < 8; x++) { val = val + input[y * 8 + x] + 3; } } ``` We have 2 extra rguments: `1` - initial value of horizontal reduction and `3`, which is added 8*8 times to the reduction. Before the patch we added `1` to the reduction value and added once `3`, though it must be added 64 times. Reviewers: mkuper, mzolotukhin Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D30262 llvm-svn: 295972
1 parent a606713 commit f77d165

File tree

2 files changed

+27
-17
lines changed

2 files changed

+27
-17
lines changed
 

Diff for: ‎llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+21-13
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ class BoUpSLP {
304304
typedef SmallVector<Instruction *, 16> InstrList;
305305
typedef SmallPtrSet<Value *, 16> ValueSet;
306306
typedef SmallVector<StoreInst *, 8> StoreList;
307+
typedef MapVector<Value *, SmallVector<DebugLoc, 2>> ExtraValueToDebugLocsMap;
307308

308309
BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
309310
TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
@@ -333,7 +334,7 @@ class BoUpSLP {
333334
/// Vectorize the tree but with the list of externally used values \p
334335
/// ExternallyUsedValues. Values in this MapVector can be replaced but the
335336
/// generated extractvalue instructions.
336-
Value *vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues);
337+
Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues);
337338

338339
/// \returns the cost incurred by unwanted spills and fills, caused by
339340
/// holding live values over call sites.
@@ -352,7 +353,7 @@ class BoUpSLP {
352353
/// into account (anf updating it, if required) list of externally used
353354
/// values stored in \p ExternallyUsedValues.
354355
void buildTree(ArrayRef<Value *> Roots,
355-
MapVector<Value *, DebugLoc> &ExternallyUsedValues,
356+
ExtraValueToDebugLocsMap &ExternallyUsedValues,
356357
ArrayRef<Value *> UserIgnoreLst = None);
357358

358359
/// Clear the internal data structures that are created by 'buildTree'.
@@ -953,11 +954,11 @@ class BoUpSLP {
953954

954955
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
955956
ArrayRef<Value *> UserIgnoreLst) {
956-
MapVector<Value *, DebugLoc> ExternallyUsedValues;
957+
ExtraValueToDebugLocsMap ExternallyUsedValues;
957958
buildTree(Roots, ExternallyUsedValues, UserIgnoreLst);
958959
}
959960
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
960-
MapVector<Value *, DebugLoc> &ExternallyUsedValues,
961+
ExtraValueToDebugLocsMap &ExternallyUsedValues,
961962
ArrayRef<Value *> UserIgnoreLst) {
962963
deleteTree();
963964
UserIgnoreList = UserIgnoreLst;
@@ -2801,12 +2802,12 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, TreeEntry *E) {
28012802
}
28022803

28032804
Value *BoUpSLP::vectorizeTree() {
2804-
MapVector<Value *, DebugLoc> ExternallyUsedValues;
2805+
ExtraValueToDebugLocsMap ExternallyUsedValues;
28052806
return vectorizeTree(ExternallyUsedValues);
28062807
}
28072808

28082809
Value *
2809-
BoUpSLP::vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues) {
2810+
BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
28102811

28112812
// All blocks must be scheduled before any instructions are inserted.
28122813
for (auto &BSIter : BlocksSchedules) {
@@ -2868,7 +2869,6 @@ BoUpSLP::vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues) {
28682869
assert(ExternallyUsedValues.count(Scalar) &&
28692870
"Scalar with nullptr as an external user must be registered in "
28702871
"ExternallyUsedValues map");
2871-
DebugLoc DL = ExternallyUsedValues[Scalar];
28722872
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
28732873
Builder.SetInsertPoint(VecI->getParent(),
28742874
std::next(VecI->getIterator()));
@@ -2878,8 +2878,9 @@ BoUpSLP::vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues) {
28782878
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
28792879
Ex = extend(ScalarRoot, Ex, Scalar->getType());
28802880
CSEBlocks.insert(cast<Instruction>(Scalar)->getParent());
2881+
auto &Locs = ExternallyUsedValues[Scalar];
2882+
ExternallyUsedValues.insert({Ex, Locs});
28812883
ExternallyUsedValues.erase(Scalar);
2882-
ExternallyUsedValues[Ex] = DL;
28832884
continue;
28842885
}
28852886

@@ -4439,9 +4440,11 @@ class HorizontalReduction {
44394440
Builder.setFastMathFlags(Unsafe);
44404441
unsigned i = 0;
44414442

4442-
MapVector<Value *, DebugLoc> ExternallyUsedValues;
4443+
BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
4444+
// The same extra argument may be used several time, so log each attempt
4445+
// to use it.
44434446
for (auto &Pair : ExtraArgs)
4444-
ExternallyUsedValues[Pair.second] = Pair.first->getDebugLoc();
4447+
ExternallyUsedValues[Pair.second].push_back(Pair.first->getDebugLoc());
44454448
while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
44464449
auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
44474450
V.buildTree(VL, ExternallyUsedValues, ReductionOps);
@@ -4489,9 +4492,14 @@ class HorizontalReduction {
44894492
Builder.CreateBinOp(ReductionOpcode, VectorizedTree, I);
44904493
}
44914494
for (auto &Pair : ExternallyUsedValues) {
4492-
Builder.SetCurrentDebugLocation(Pair.second);
4493-
VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree,
4494-
Pair.first, "bin.extra");
4495+
assert(!Pair.second.empty() &&
4496+
"At least one DebugLoc must be inserted");
4497+
// Add each externally used value to the final reduction.
4498+
for (auto &DL : Pair.second) {
4499+
Builder.SetCurrentDebugLocation(DL);
4500+
VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree,
4501+
Pair.first, "bin.extra");
4502+
}
44954503
}
44964504
// Update users.
44974505
if (ReductionPHI && !isa<UndefValue>(ReductionPHI)) {

Diff for: ‎llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll

+6-4
Original file line numberDiff line numberDiff line change
@@ -1473,9 +1473,10 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a
14731473
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
14741474
; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
14751475
; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], 5.000000e+00
1476-
; CHECK-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], [[CONV]]
1476+
; CHECK-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], 5.000000e+00
1477+
; CHECK-NEXT: [[BIN_EXTRA7:%.*]] = fadd fast float [[BIN_EXTRA6]], [[CONV]]
14771478
; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
1478-
; CHECK-NEXT: ret float [[BIN_EXTRA6]]
1479+
; CHECK-NEXT: ret float [[BIN_EXTRA7]]
14791480
;
14801481
; THRESHOLD-LABEL: @extra_args_same_several_times(
14811482
; THRESHOLD-NEXT: entry:
@@ -1510,9 +1511,10 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a
15101511
; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
15111512
; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
15121513
; THRESHOLD-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], 5.000000e+00
1513-
; THRESHOLD-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], [[CONV]]
1514+
; THRESHOLD-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], 5.000000e+00
1515+
; THRESHOLD-NEXT: [[BIN_EXTRA7:%.*]] = fadd fast float [[BIN_EXTRA6]], [[CONV]]
15141516
; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
1515-
; THRESHOLD-NEXT: ret float [[BIN_EXTRA6]]
1517+
; THRESHOLD-NEXT: ret float [[BIN_EXTRA7]]
15161518
;
15171519
entry:
15181520
%mul = mul nsw i32 %b, %a

0 commit comments

Comments
 (0)
Please sign in to comment.