Index: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -304,6 +304,7 @@ typedef SmallVector InstrList; typedef SmallPtrSet ValueSet; typedef SmallVector StoreList; + typedef MapVector> ExtraValueToDebugLocsMap; BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li, @@ -333,7 +334,7 @@ /// Vectorize the tree but with the list of externally used values \p /// ExternallyUsedValues. Values in this MapVector can be replaced but the /// generated extractvalue instructions. - Value *vectorizeTree(MapVector &ExternallyUsedValues); + Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues); /// \returns the cost incurred by unwanted spills and fills, caused by /// holding live values over call sites. @@ -352,7 +353,7 @@ /// into account (anf updating it, if required) list of externally used /// values stored in \p ExternallyUsedValues. void buildTree(ArrayRef Roots, - MapVector &ExternallyUsedValues, + ExtraValueToDebugLocsMap &ExternallyUsedValues, ArrayRef UserIgnoreLst = None); /// Clear the internal data structures that are created by 'buildTree'. @@ -953,11 +954,11 @@ void BoUpSLP::buildTree(ArrayRef Roots, ArrayRef UserIgnoreLst) { - MapVector ExternallyUsedValues; + ExtraValueToDebugLocsMap ExternallyUsedValues; buildTree(Roots, ExternallyUsedValues, UserIgnoreLst); } void BoUpSLP::buildTree(ArrayRef Roots, - MapVector &ExternallyUsedValues, + ExtraValueToDebugLocsMap &ExternallyUsedValues, ArrayRef UserIgnoreLst) { deleteTree(); UserIgnoreList = UserIgnoreLst; @@ -2801,12 +2802,12 @@ } Value *BoUpSLP::vectorizeTree() { - MapVector ExternallyUsedValues; + ExtraValueToDebugLocsMap ExternallyUsedValues; return vectorizeTree(ExternallyUsedValues); } Value * -BoUpSLP::vectorizeTree(MapVector &ExternallyUsedValues) { +BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { // All blocks must be scheduled before any instructions are inserted. for (auto &BSIter : BlocksSchedules) { @@ -2868,7 +2869,6 @@ assert(ExternallyUsedValues.count(Scalar) && "Scalar with nullptr as an external user must be registered in " "ExternallyUsedValues map"); - DebugLoc DL = ExternallyUsedValues[Scalar]; if (auto *VecI = dyn_cast(Vec)) { Builder.SetInsertPoint(VecI->getParent(), std::next(VecI->getIterator())); @@ -2878,8 +2878,8 @@ Value *Ex = Builder.CreateExtractElement(Vec, Lane); Ex = extend(ScalarRoot, Ex, Scalar->getType()); CSEBlocks.insert(cast(Scalar)->getParent()); + ExternallyUsedValues[Ex] = ExternallyUsedValues[Scalar]; ExternallyUsedValues.erase(Scalar); - ExternallyUsedValues[Ex] = DL; continue; } @@ -4439,9 +4439,11 @@ Builder.setFastMathFlags(Unsafe); unsigned i = 0; - MapVector ExternallyUsedValues; + BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues; + // The same extra argument may be used several time, so log each attempt + // to use it. for (auto &Pair : ExtraArgs) - ExternallyUsedValues[Pair.second] = Pair.first->getDebugLoc(); + ExternallyUsedValues[Pair.second].push_back(Pair.first->getDebugLoc()); while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) { auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth); V.buildTree(VL, ExternallyUsedValues, ReductionOps); @@ -4489,9 +4491,12 @@ Builder.CreateBinOp(ReductionOpcode, VectorizedTree, I); } for (auto &Pair : ExternallyUsedValues) { - Builder.SetCurrentDebugLocation(Pair.second); - VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree, - Pair.first, "bin.extra"); + // Add each externally used value to the final reduction. + for (auto &DL : Pair.second) { + Builder.SetCurrentDebugLocation(DL); + VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree, + Pair.first, "bin.extra"); + } } // Update users. if (ReductionPHI && !isa(ReductionPHI)) { Index: llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll =================================================================== --- llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -1473,9 +1473,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], 5.000000e+00 -; CHECK-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], [[CONV]] +; CHECK-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], 5.000000e+00 +; CHECK-NEXT: [[BIN_EXTRA7:%.*]] = fadd fast float [[BIN_EXTRA6]], [[CONV]] ; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] -; CHECK-NEXT: ret float [[BIN_EXTRA6]] +; CHECK-NEXT: ret float [[BIN_EXTRA7]] ; ; THRESHOLD-LABEL: @extra_args_same_several_times( ; THRESHOLD-NEXT: entry: @@ -1510,9 +1511,10 @@ ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], 5.000000e+00 -; THRESHOLD-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], [[CONV]] +; THRESHOLD-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], 5.000000e+00 +; THRESHOLD-NEXT: [[BIN_EXTRA7:%.*]] = fadd fast float [[BIN_EXTRA6]], [[CONV]] ; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] -; THRESHOLD-NEXT: ret float [[BIN_EXTRA6]] +; THRESHOLD-NEXT: ret float [[BIN_EXTRA7]] ; entry: %mul = mul nsw i32 %b, %a