diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -919,6 +919,18 @@ /// Construct a vectorizable tree that starts at \p Roots. void buildTree(ArrayRef Roots); + /// Checks if the very first tree node is going to be vectorized. + bool isVectorizedFirstNode() const { + return !VectorizableTree.empty() && + VectorizableTree.front()->State == TreeEntry::Vectorize; + } + + /// Returns the main instruction for the very first node. + Instruction *getFirstNodeMainOp() const { + assert(!VectorizableTree.empty() && "No tree to get the first node from"); + return VectorizableTree.front()->getMainOp(); + } + /// Builds external uses of the vectorized scalars, i.e. the list of /// vectorized scalars to be extracted, their lanes and their scalar users. \p /// ExternallyUsedValues contains additional list of external uses to handle @@ -6614,7 +6626,21 @@ InstructionCost VecLdCost; if (E->State == TreeEntry::Vectorize) { VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, Alignment, 0, - CostKind, {TTI::OK_AnyValue, TTI::OP_None}, VL0); + CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, VL0); + for (Value *V : VL) { + auto *VI = cast(V); + // Add the costs of scalar GEP pointers, to be removed from the code. + if (VI == VL0) + continue; + auto *Ptr = dyn_cast(VI->getPointerOperand()); + if (!Ptr || !Ptr->hasOneUse() || + all_of(Ptr->indices(), + [](const Use &U) { return isConstant(U.get()); })) + continue; + ScalarLdCost += TTI->getArithmeticInstrCost(Instruction::Add, + Ptr->getType(), CostKind); + } } else { assert(E->State == TreeEntry::ScatterVectorize && "Unknown EntryState"); Align CommonAlignment = Alignment; @@ -6636,11 +6662,21 @@ Align Alignment = SI->getAlign(); InstructionCost ScalarStCost = 0; for (auto *V : VL) { - auto *VI = cast(V); + auto *VI = cast(V); TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(VI->getOperand(0)); ScalarStCost += TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0, CostKind, OpInfo, VI); + // Add the costs of scalar GEP pointers, to be removed from the code. + if (VI == SI) + continue; + auto *Ptr = dyn_cast(VI->getPointerOperand()); + if (!Ptr || !Ptr->hasOneUse() || + all_of(Ptr->indices(), + [](const Use &U) { return isConstant(U.get()); })) + continue; + ScalarStCost += TTI->getArithmeticInstrCost(Instruction::Add, + Ptr->getType(), CostKind); } TTI::OperandValueInfo OpInfo = getOperandInfo(VL, 0); InstructionCost VecStCost = @@ -11442,6 +11478,23 @@ InstructionCost TreeCost = V.getTreeCost(VL); InstructionCost ReductionCost = getReductionCost(TTI, VL, ReduxWidth, RdxFMF); + if (V.isVectorizedFirstNode() && isa(VL.front())) { + Instruction *MainOp = V.getFirstNodeMainOp(); + for (Value *V : VL) { + auto *VI = dyn_cast(V); + // Add the costs of scalar GEP pointers, to be removed from the + // code. + if (!VI || VI == MainOp) + continue; + auto *Ptr = dyn_cast(VI->getPointerOperand()); + if (!Ptr || !Ptr->hasOneUse() || + all_of(Ptr->indices(), + [](const Use &U) { return isConstant(U.get()); })) + continue; + TreeCost -= TTI->getArithmeticInstrCost( + Instruction::Add, Ptr->getType(), TTI::TCK_RecipThroughput); + } + } InstructionCost Cost = TreeCost + ReductionCost; LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for reduction\n"); if (!Cost.isValid()) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll @@ -91,7 +91,7 @@ ; YAML-NEXT: Function: foo ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' - ; YAML-NEXT: - Cost: '-5' + ; YAML-NEXT: - Cost: '-14' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '4' diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll @@ -68,7 +68,7 @@ ; YAML-NEXT: Function: foo ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' - ; YAML-NEXT: - Cost: '-1' + ; YAML-NEXT: - Cost: '-4' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '4'