diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1677,7 +1677,10 @@ /// not matter because we use the 'cost' units to compare different /// vector widths. The cost that is returned is *not* normalized by /// the factor width. - VectorizationCostTy expectedCost(ElementCount VF); + VectorizationCostTy expectedCost( + ElementCount VF, + SmallVectorImpl> *Invalid = + nullptr); /// Returns the execution time cost of an instruction for a given vector /// width. Vector width of one means scalar. @@ -6075,12 +6078,13 @@ ChosenFactor.Cost = InstructionCost::getMax(); } + SmallVector> InvalidCosts; for (const auto &i : VFCandidates) { // The cost for scalar VF=1 is already calculated, so ignore it. if (i.isScalar()) continue; - VectorizationCostTy C = expectedCost(i); + VectorizationCostTy C = expectedCost(i, &InvalidCosts); VectorizationFactor Candidate(i, C.first); LLVM_DEBUG( dbgs() << "LV: Vector loop of width " << i << " costs: " @@ -6103,6 +6107,40 @@ ChosenFactor = Candidate; } + // Emit a report of VFs with invalid costs in the loop, e.g. + // remark: Instructions with invalid costs prevent vectorization for certain + // VFs: + // call float @llvm.sin(..) (vscale x 1, vscale x 4) + if (!InvalidCosts.empty()) { + // Sort/group per instruction + llvm::sort(InvalidCosts, [](std::pair &A, + std::pair &B) { + ElementCountComparator ECC; + return A.first < B.first || ECC(A.second, B.second); + }); + + std::string OutString; + raw_string_ostream OS(OutString); + OS << "Instructions with invalid costs prevent vectorization for certain " + "VFs:\n"; + + Instruction *Current = nullptr; + for (const auto &InstructionVFPair : InvalidCosts) { + if (Current != InstructionVFPair.first) { + if (Current != nullptr) + OS << ")\n"; + OS << "\t" << *InstructionVFPair.first << "\t("; + Current = InstructionVFPair.first; + } else + OS << ", "; + OS << InstructionVFPair.second; + } + OS << ")"; + OS.flush(); + reportVectorizationInfo(OutString, "InvalidCost", ORE, TheLoop, + InvalidCosts[0].first); + } + if (!EnableCondStoresVectorization && NumPredStores) { reportVectorizationFailure("There are conditional stores.", "store that is conditionally executed prevents vectorization", @@ -6884,7 +6922,9 @@ } LoopVectorizationCostModel::VectorizationCostTy -LoopVectorizationCostModel::expectedCost(ElementCount VF) { +LoopVectorizationCostModel::expectedCost( + ElementCount VF, + SmallVectorImpl> *Invalid) { VectorizationCostTy Cost; // For each block. @@ -6904,6 +6944,10 @@ if (ForceTargetInstructionCost.getNumOccurrences() > 0) C.first = InstructionCost(ForceTargetInstructionCost); + // Keep a list of instructions with invalid costs. + if (Invalid && !C.first.isValid()) + Invalid->emplace_back(&I, VF); + BlockCost.first += C.first; BlockCost.second |= C.second; LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll @@ -1,4 +1,6 @@ -; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on \ +; RUN: -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s +; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) { ; CHECK-LABEL: @vec_load @@ -95,6 +97,11 @@ ret void } +; CHECK-REMARKS: UserVF ignored because of invalid costs. +; CHECK-REMARKS-NEXT: Instructions with invalid costs prevent vectorization for certain VFs: +; CHECK-REMARKS-NEXT: %0 = load float, float* %arrayidx, align 4 (vscale x 1) +; CHECK-REMARKS-NEXT: %1 = tail call fast float @llvm.sin.f32(float %0) (vscale x 1, vscale x 2) +; CHECK-REMARKS-NEXT: store float %1, float* %arrayidx1, align 4 (vscale x 1) define void @vec_sin_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) { ; CHECK: @vec_sin_no_mapping ; CHECK: call fast <2 x float> @llvm.sin.v2f32 @@ -117,6 +124,11 @@ ret void } +; CHECK-REMARKS: UserVF ignored because of invalid costs. +; CHECK-REMARKS-NEXT: Instructions with invalid costs prevent vectorization for certain VFs: +; CHECK-REMARKS-NEXT: %0 = load float, float* %arrayidx, align 4 (vscale x 1) +; CHECK-REMARKS-NEXT: %1 = tail call fast float @llvm.sin.f32(float %0) #5 (vscale x 1, vscale x 2) +; CHECK-REMARKS-NEXT: store float %1, float* %arrayidx1, align 4 (vscale x 1) define void @vec_sin_fixed_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) { ; CHECK: @vec_sin_fixed_mapping ; CHECK: call fast <2 x float> @llvm.sin.v2f32