Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -54,6 +54,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -98,6 +99,9 @@ using namespace llvm; using namespace llvm::PatternMatch; +STATISTIC(LoopsVectorized, "Number of loops vectorized"); +STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization"); + static cl::opt VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect.")); @@ -1080,6 +1084,8 @@ for (Loop *L : *LI) addInnerLoop(*L, Worklist); + LoopsAnalyzed += Worklist.size(); + // Now walk the identified inner loops. bool Changed = false; while (!Worklist.empty()) @@ -1182,6 +1188,7 @@ // If we decided that it is *legal* to vectorize the loop then do it. InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF); LB.vectorize(&LVL); + ++LoopsVectorized; } // Mark the loop as already vectorized to avoid vectorizing again. Index: test/Transforms/LoopVectorize/vect.stats.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/vect.stats.ll @@ -0,0 +1,65 @@ +; RUN: opt < %s -loop-vectorize -force-vector-unroll=4 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s +; REQUIRES: asserts + +; +; We have 2 loops, one of them is vectorizable and the second one is not. +; + +; CHECK: 12 loop-vectorize - Number of loops analyzed for vectorization +; CHECK: 1 loop-vectorize - Number of loops vectorized + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @vectorized(float* nocapture %a, i64 %size) { +entry: + %cmp1 = icmp sgt i64 %size, 0 + br i1 %cmp1, label %for.header, label %for.end + +for.header: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %cmp2 = icmp sgt i64 %indvars.iv, %size + br i1 %cmp2, label %for.end, label %for.body + +for.body: + + %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %mul = fmul float %0, %0 + store float %mul, float* %arrayidx, align 4 + + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.header + +for.end: + ret void +} + +define void @not_vectorized(float* nocapture %a, i64 %size) { +entry: + %cmp1 = icmp sgt i64 %size, 0 + br i1 %cmp1, label %for.header, label %for.end + +for.header: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %cmp2 = icmp sgt i64 %indvars.iv, %size + br i1 %cmp2, label %for.end, label %for.body + +for.body: + + %0 = add nsw i64 %indvars.iv, -5 + %arrayidx = getelementptr inbounds float* %a, i64 %0 + %1 = load float* %arrayidx, align 4 + %2 = add nsw i64 %indvars.iv, 2 + %arrayidx2 = getelementptr inbounds float* %a, i64 %2 + %3 = load float* %arrayidx2, align 4 + %mul = fmul float %1, %3 + %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv + store float %mul, float* %arrayidx4, align 4 + + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.header + +for.end: + ret void +} \ No newline at end of file