Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4433,19 +4433,38 @@ unsigned Sz = R.getVectorElementSize(I0); unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz); unsigned MaxVF = std::max(PowerOf2Floor(VL.size()), MinVF); - if (MaxVF < 2) - return false; + if (MaxVF < 2) { + R.getORE()->emit(OptimizationRemarkMissed(SV_NAME, "SmallVF", I0) + << "Cannot SLP vectorize list: vectorization factor less than 2 is non-sense"); + return false; + } for (Value *V : VL) { Type *Ty = V->getType(); - if (!isValidElementType(Ty)) + if (!isValidElementType(Ty)) { + std::string type_str; + llvm::raw_string_ostream rso(type_str); + Ty->print(rso); + // NOTE: the following will give user internal llvm type name, which may not be useful + R.getORE()->emit(OptimizationRemarkMissed(SV_NAME, "UnsupportedType", I0) + << "Cannot SLP vectorize list: type " + rso.str() + " is unsupported by vectorizer"); return false; + } Instruction *Inst = dyn_cast(V); - if (!Inst || Inst->getOpcode() != Opcode0) + + if (!Inst) + return false; + if (Inst->getOpcode() != Opcode0) { + // FIXME: need more user-friendly message here + R.getORE()->emit(OptimizationRemarkMissed(SV_NAME, "InequableTypes", I0) + << "Cannot SLP vectorize list: not all of the parts of scalar instructions are of the same type"); return false; + } } bool Changed = false; + bool wasPossible = false; + int minCost = SLPCostThreshold; // Keep track of values that were deleted by vectorizing in the loop below. SmallVector TrackValues(VL.begin(), VL.end()); @@ -4499,6 +4518,8 @@ R.computeMinimumValueSizes(); int Cost = R.getTreeCost(); + wasPossible = true; + minCost = std::min(minCost, Cost); if (Cost < -SLPCostThreshold) { DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); @@ -4541,6 +4562,15 @@ } } + if (!Changed && wasPossible) { + R.getORE()->emit(OptimizationRemark(SV_NAME, "NotBeneficial", I0) + << "List vectorization was possible but not beneficial with cost " + << ore::NV("Cost", minCost) << " >= " << ore::NV("Treshold", -SLPCostThreshold)); + } else if (!Changed) { + R.getORE()->emit(OptimizationRemarkMissed(SV_NAME, "NotPossible", I0) + << "Cannot vectorize list: vectorization was impossible" + << " with available vectorization factors"); + } return Changed; } @@ -5234,6 +5264,9 @@ SmallVector Reversed(VL.rbegin(), VL.rend()); V.buildTree(Reversed, ExternallyUsedValues, IgnoreList); } + + auto *I0 = cast(VL[0]); + if (V.isTreeTinyAndNotFullyVectorizable()) break; @@ -5242,12 +5275,15 @@ // Estimate cost. int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i], ReduxWidth); - if (Cost >= -SLPCostThreshold) + if (Cost >= -SLPCostThreshold) { + V.getORE()->emit(OptimizationRemarkMissed(SV_NAME, "", I0) + << "Vectorizing horizontal reduction is possible but not beneficial with cost " + << ore::NV("Cost", Cost)); break; + } DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost << ". (HorRdx)\n"); - auto *I0 = cast(VL[0]); V.getORE()->emit( OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction", I0) << "Vectorized horizontal reduction with cost " Index: test/Transforms/SLPVectorizer/remark_slp_chain.c =================================================================== --- /dev/null +++ test/Transforms/SLPVectorizer/remark_slp_chain.c @@ -0,0 +1,15 @@ +// RUN: clang -O3 -fslp-vectorize -Rpass=slp-vectorizer -Rpass-missed=slp-vectorizer -S %s -o %t.s 2> %t.err +// RUN: FileCheck --input-file=%t.err %s + +void vsub2_test(int *__restrict pin1, int *__restrict pin2, int *__restrict pout) { + int *ptmpi1 = pin1, *ptmpi2 = pin2, *po = pout, idx; + #pragma clang loop unroll(disable) + for (idx = 0; idx < 64; idx++) { + *po++ = *ptmpi1++ - *ptmpi2++; + *po++ = *ptmpi1++ - *ptmpi2++; + *po++ = *ptmpi1++ - *ptmpi2++; + *po++ = *ptmpi1++ - *ptmpi2++; + *po++ = *ptmpi1++ - *ptmpi2++; + // CHECK: {{.*}}:[[@LINE-5]]:{{[0-9]+}}: remark: Stores SLP vectorized with cost {{-[0-9]+}} and with tree size {{[0-9]+}} [-Rpass=slp-vectorizer] + } +} Index: test/Transforms/SLPVectorizer/remark_slp_listcost.c =================================================================== --- /dev/null +++ test/Transforms/SLPVectorizer/remark_slp_listcost.c @@ -0,0 +1,9 @@ +// RUN: clang -O3 -Rpass=slp-vectorizer -Rpass-missed=slp-vectorizer -S %s -o %t.s 2> %t.err +// RUN: FileCheck --input-file=%t.err %s + +void vsub2_test(int *pin1, int *pin2, int *pout) { + int *ptmpi1 = pin1, *ptmpi2 = pin2, *po = pout, idx; + // CHECK: {{.*}}:[[@LINE-2]]:{{[0-9]+}}: remark: List vectorization was possible but not beneficial with cost 0 >= 0 + for (idx = 0; idx < 64; idx++) + *po++ = *ptmpi1++ - *ptmpi2++; +}