Skip to content

Commit b551a81

Browse files
committedFeb 22, 2017
[SLP] Fix for PR32036: Vectorized horizontal reduction returning wrong result
Summary: If the same value is used several times as an extra value, SLP vectorizer takes it into account only once instead of actual number of using. For example: ``` int val = 1; for (int y = 0; y < 8; y++) { for (int x = 0; x < 8; x++) { val = val + input[y * 8 + x] + 3; } } ``` We have 2 extra rguments: `1` - initial value of horizontal reduction and `3`, which is added 8*8 times to the reduction. Before the patch we added `1` to the reduction value and added once `3`, though it must be added 64 times. Reviewers: mkuper, mzolotukhin Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D30262 llvm-svn: 295868
1 parent 4991d35 commit b551a81

File tree

2 files changed

+24
-17
lines changed

2 files changed

+24
-17
lines changed
 

‎llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+18-13
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ class BoUpSLP {
304304
typedef SmallVector<Instruction *, 16> InstrList;
305305
typedef SmallPtrSet<Value *, 16> ValueSet;
306306
typedef SmallVector<StoreInst *, 8> StoreList;
307+
typedef MapVector<Value *, std::vector<DebugLoc>> ExtraValueToDebugLocsMap;
307308

308309
BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
309310
TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
@@ -333,7 +334,7 @@ class BoUpSLP {
333334
/// Vectorize the tree but with the list of externally used values \p
334335
/// ExternallyUsedValues. Values in this MapVector can be replaced but the
335336
/// generated extractvalue instructions.
336-
Value *vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues);
337+
Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues);
337338

338339
/// \returns the cost incurred by unwanted spills and fills, caused by
339340
/// holding live values over call sites.
@@ -352,7 +353,7 @@ class BoUpSLP {
352353
/// into account (anf updating it, if required) list of externally used
353354
/// values stored in \p ExternallyUsedValues.
354355
void buildTree(ArrayRef<Value *> Roots,
355-
MapVector<Value *, DebugLoc> &ExternallyUsedValues,
356+
ExtraValueToDebugLocsMap &ExternallyUsedValues,
356357
ArrayRef<Value *> UserIgnoreLst = None);
357358

358359
/// Clear the internal data structures that are created by 'buildTree'.
@@ -953,11 +954,11 @@ class BoUpSLP {
953954

954955
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
955956
ArrayRef<Value *> UserIgnoreLst) {
956-
MapVector<Value *, DebugLoc> ExternallyUsedValues;
957+
ExtraValueToDebugLocsMap ExternallyUsedValues;
957958
buildTree(Roots, ExternallyUsedValues, UserIgnoreLst);
958959
}
959960
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
960-
MapVector<Value *, DebugLoc> &ExternallyUsedValues,
961+
ExtraValueToDebugLocsMap &ExternallyUsedValues,
961962
ArrayRef<Value *> UserIgnoreLst) {
962963
deleteTree();
963964
UserIgnoreList = UserIgnoreLst;
@@ -2801,12 +2802,12 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, TreeEntry *E) {
28012802
}
28022803

28032804
Value *BoUpSLP::vectorizeTree() {
2804-
MapVector<Value *, DebugLoc> ExternallyUsedValues;
2805+
ExtraValueToDebugLocsMap ExternallyUsedValues;
28052806
return vectorizeTree(ExternallyUsedValues);
28062807
}
28072808

28082809
Value *
2809-
BoUpSLP::vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues) {
2810+
BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
28102811

28112812
// All blocks must be scheduled before any instructions are inserted.
28122813
for (auto &BSIter : BlocksSchedules) {
@@ -2868,7 +2869,6 @@ BoUpSLP::vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues) {
28682869
assert(ExternallyUsedValues.count(Scalar) &&
28692870
"Scalar with nullptr as an external user must be registered in "
28702871
"ExternallyUsedValues map");
2871-
DebugLoc DL = ExternallyUsedValues[Scalar];
28722872
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
28732873
Builder.SetInsertPoint(VecI->getParent(),
28742874
std::next(VecI->getIterator()));
@@ -2878,8 +2878,8 @@ BoUpSLP::vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues) {
28782878
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
28792879
Ex = extend(ScalarRoot, Ex, Scalar->getType());
28802880
CSEBlocks.insert(cast<Instruction>(Scalar)->getParent());
2881+
ExternallyUsedValues[Ex] = ExternallyUsedValues[Scalar];
28812882
ExternallyUsedValues.erase(Scalar);
2882-
ExternallyUsedValues[Ex] = DL;
28832883
continue;
28842884
}
28852885

@@ -4439,9 +4439,11 @@ class HorizontalReduction {
44394439
Builder.setFastMathFlags(Unsafe);
44404440
unsigned i = 0;
44414441

4442-
MapVector<Value *, DebugLoc> ExternallyUsedValues;
4442+
BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
4443+
// The same extra argument may be used several time, so log each attempt
4444+
// to use it.
44434445
for (auto &Pair : ExtraArgs)
4444-
ExternallyUsedValues[Pair.second] = Pair.first->getDebugLoc();
4446+
ExternallyUsedValues[Pair.second].push_back(Pair.first->getDebugLoc());
44454447
while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
44464448
auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
44474449
V.buildTree(VL, ExternallyUsedValues, ReductionOps);
@@ -4489,9 +4491,12 @@ class HorizontalReduction {
44894491
Builder.CreateBinOp(ReductionOpcode, VectorizedTree, I);
44904492
}
44914493
for (auto &Pair : ExternallyUsedValues) {
4492-
Builder.SetCurrentDebugLocation(Pair.second);
4493-
VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree,
4494-
Pair.first, "bin.extra");
4494+
// Add each externally used value to the final reduction.
4495+
for (auto &DL : Pair.second) {
4496+
Builder.SetCurrentDebugLocation(DL);
4497+
VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree,
4498+
Pair.first, "bin.extra");
4499+
}
44954500
}
44964501
// Update users.
44974502
if (ReductionPHI && !isa<UndefValue>(ReductionPHI)) {

‎llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll

+6-4
Original file line numberDiff line numberDiff line change
@@ -1473,9 +1473,10 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a
14731473
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
14741474
; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
14751475
; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], 5.000000e+00
1476-
; CHECK-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], [[CONV]]
1476+
; CHECK-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], 5.000000e+00
1477+
; CHECK-NEXT: [[BIN_EXTRA7:%.*]] = fadd fast float [[BIN_EXTRA6]], [[CONV]]
14771478
; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
1478-
; CHECK-NEXT: ret float [[BIN_EXTRA6]]
1479+
; CHECK-NEXT: ret float [[BIN_EXTRA7]]
14791480
;
14801481
; THRESHOLD-LABEL: @extra_args_same_several_times(
14811482
; THRESHOLD-NEXT: entry:
@@ -1510,9 +1511,10 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a
15101511
; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
15111512
; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
15121513
; THRESHOLD-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], 5.000000e+00
1513-
; THRESHOLD-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], [[CONV]]
1514+
; THRESHOLD-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], 5.000000e+00
1515+
; THRESHOLD-NEXT: [[BIN_EXTRA7:%.*]] = fadd fast float [[BIN_EXTRA6]], [[CONV]]
15141516
; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
1515-
; THRESHOLD-NEXT: ret float [[BIN_EXTRA6]]
1517+
; THRESHOLD-NEXT: ret float [[BIN_EXTRA7]]
15161518
;
15171519
entry:
15181520
%mul = mul nsw i32 %b, %a

0 commit comments

Comments
 (0)