Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -214,6 +214,11 @@ cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop.")); +static cl::opt PragmaVectorizeMemoryCheckThreshold( + "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, + cl::desc("The maximum allowed number of runtime memory checks with a " + "vectorize(enable) pragma.")); + namespace { // Forward declarations. @@ -862,14 +867,16 @@ bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const { if (getForce() == LoopVectorizeHints::FK_Disabled) { DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); - emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F, + emitOptimizationRemarkAnalysis(F->getContext(), + vectorizeAnalysisPassName(), *F, L->getStartLoc(), emitRemark()); return false; } if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) { DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); - emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F, + emitOptimizationRemarkAnalysis(F->getContext(), + vectorizeAnalysisPassName(), *F, L->getStartLoc(), emitRemark()); return false; } @@ -882,7 +889,7 @@ // vectorize.disable to be used without disabling the pass and errors // to differentiate between disabled vectorization and a width of 1. emitOptimizationRemarkAnalysis( - F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), + F->getContext(), vectorizeAnalysisPassName(), *F, L->getStartLoc(), "loop not vectorized: vectorization and interleaving are explicitly " "disabled, or vectorize width and interleave count are both set to " "1"); @@ -915,9 +922,25 @@ unsigned getWidth() const { return Width.Value; } unsigned getInterleave() const { return Interleave.Value; } enum ForceKind getForce() const { return (ForceKind)Force.Value; } - bool isForced() const { - return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1 || - getInterleave() > 1; + const char *vectorizeAnalysisPassName() const { + // If hints are provided that don't disable vectorization use the + // AlwaysPrint pass name to force the frontend to print the diagnostic. + if (getWidth() == 1) + return LV_NAME; + if (getForce() == LoopVectorizeHints::FK_Disabled) + return LV_NAME; + if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) + return LV_NAME; + return DiagnosticInfo::AlwaysPrint; + } + + bool allowReordering() const { + // Allow the vectorizer to change the order of operations that is given by + // the scalar loop when it is potentially unsafe or inefficient. For + // example, changing the order of floating-point operations can affect the + // result due to differences in floating-point round-off. Specifying a loop + // hint instructs the vectorizer to proceed. + return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1; } private: @@ -1039,15 +1062,14 @@ static void emitAnalysisDiag(const Function *TheFunction, const Loop *TheLoop, const LoopVectorizeHints &Hints, const LoopAccessReport &Message) { - // If a loop hint is provided the diagnostic is always produced. - const char *Name = Hints.isForced() ? DiagnosticInfo::AlwaysPrint : LV_NAME; + const char *Name = Hints.vectorizeAnalysisPassName(); LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, Name); } static void emitMissedWarning(Function *F, Loop *L, const LoopVectorizeHints &LH) { - emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F, - L->getStartLoc(), LH.emitRemark()); + emitOptimizationRemarkMissed(F->getContext(), LV_NAME, *F, L->getStartLoc(), + LH.emitRemark()); if (LH.getForce() == LoopVectorizeHints::FK_Enabled) { if (LH.getWidth() != 1) @@ -1497,32 +1519,27 @@ void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; } bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints) { - // If a loop hint is provided the diagnostic is always produced. - const char *Name = Hints.isForced() ? DiagnosticInfo::AlwaysPrint : LV_NAME; + const char *Name = Hints.vectorizeAnalysisPassName(); bool Failed = false; - if (UnsafeAlgebraInst && - Hints.getForce() == LoopVectorizeHints::FK_Undefined && - Hints.getWidth() == 0) { + if (UnsafeAlgebraInst && !Hints.allowReordering()) { emitOptimizationRemarkAnalysisFPCommute( F->getContext(), Name, *F, UnsafeAlgebraInst->getDebugLoc(), - VectorizationReport() << "vectorization requires changes in the " - "order of operations, however IEEE 754 " - "floating-point operations are not " - "commutative"); + VectorizationReport() << "cannot prove it is safe to reorder " + "floating-point operations"); Failed = true; } - if (NumRuntimePointerChecks > - VectorizerParams::RuntimeMemoryCheckThreshold) { + // Test if runtime memcheck thresholds are exceeded. + bool PragmaThresholdReached = + NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; + bool ThresholdReached = NumRuntimePointerChecks > + VectorizerParams::RuntimeMemoryCheckThreshold && + !Hints.allowReordering(); + if (ThresholdReached || PragmaThresholdReached) { emitOptimizationRemarkAnalysisAliasing( F->getContext(), Name, *F, L->getStartLoc(), VectorizationReport() - << "cannot prove pointers refer to independent arrays in memory. " - "The loop requires " - << NumRuntimePointerChecks - << " runtime independence checks to vectorize the loop, but that " - "would exceed the limit of " - << VectorizerParams::RuntimeMemoryCheckThreshold << " checks"); + << "cannot prove it is safe to reorder memory operations"); DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); Failed = true; } @@ -1799,21 +1816,22 @@ IC = UserIC > 0 ? UserIC : IC; // Emit diagnostic messages, if any. + const char *VAPassName = Hints.vectorizeAnalysisPassName(); if (!VectorizeLoop && !InterleaveLoop) { // Do not vectorize or interleaving the loop. - emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F, + emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F, L->getStartLoc(), VecDiagMsg); - emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F, + emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F, L->getStartLoc(), IntDiagMsg); return false; } else if (!VectorizeLoop && InterleaveLoop) { DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); - emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F, + emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F, L->getStartLoc(), VecDiagMsg); } else if (VectorizeLoop && !InterleaveLoop) { DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " << DebugLocStr << '\n'); - emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F, + emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F, L->getStartLoc(), IntDiagMsg); } else if (VectorizeLoop && InterleaveLoop) { DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " @@ -1828,7 +1846,7 @@ InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC); Unroller.vectorize(&LVL); - emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), + emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), Twine("interleaved loop (interleaved count: ") + Twine(IC) + ")"); } else { @@ -1844,7 +1862,7 @@ AddRuntimeUnrollDisableMetaData(L); // Report the vectorization decision. - emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), + emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), Twine("vectorized loop (vectorization width: ") + Twine(VF.Width) + ", interleaved count: " + Twine(IC) + ")"); Index: test/Transforms/LoopVectorize/X86/no_fpmath.ll =================================================================== --- test/Transforms/LoopVectorize/X86/no_fpmath.ll +++ test/Transforms/LoopVectorize/X86/no_fpmath.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s -; CHECK: remark: no_fpmath.c:6:11: loop not vectorized: vectorization requires changes in the order of operations, however IEEE 754 floating-point operations are not commutative +; CHECK: remark: no_fpmath.c:6:11: loop not vectorized: cannot prove it is safe to reorder floating-point operations ; CHECK: remark: no_fpmath.c:6:14: loop not vectorized: ; CHECK: remark: no_fpmath.c:17:14: vectorized loop (vectorization width: 2, interleaved count: 2) Index: test/Transforms/LoopVectorize/no_switch.ll =================================================================== --- test/Transforms/LoopVectorize/no_switch.ll +++ test/Transforms/LoopVectorize/no_switch.ll @@ -1,9 +1,17 @@ -; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' 2>&1 | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S 2>&1 | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -S 2>&1 | FileCheck %s -check-prefix=NOANALYSIS +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -pass-remarks-missed='loop-vectorize' -S 2>&1 | FileCheck %s -check-prefix=MOREINFO ; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement -; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info (Force=true, Vector Width=4) ; CHECK: warning: source.cpp:4:5: loop not vectorized: failed explicitly specified loop vectorization +; NOANALYSIS-NOT: remark: {{.*}} +; NOANALYSIS: warning: source.cpp:4:5: loop not interleaved: failed explicitly specified loop interleaving + +; MOREINFO: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement +; MOREINFO: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info (Force=true, Vector Width=4) +; MOREINFO: warning: source.cpp:4:5: loop not vectorized: failed explicitly specified loop vectorization + ; CHECK: _Z11test_switchPii ; CHECK-NOT: x i32> ; CHECK: ret Index: test/Transforms/LoopVectorize/runtime-limit.ll =================================================================== --- test/Transforms/LoopVectorize/runtime-limit.ll +++ test/Transforms/LoopVectorize/runtime-limit.ll @@ -1,17 +1,27 @@ -; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s -check-prefix=OVERRIDE +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -pragma-vectorize-memory-check-threshold=6 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" ; First loop produced diagnostic pass remark. -;CHECK: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1) +;CHECK: remark: {{.*}}:0:0: vectorized loop (vectorization width: {{[0-9]}}, interleaved count: 1) ; Second loop produces diagnostic analysis remark. -;CHECK: remark: {{.*}}:0:0: loop not vectorized: cannot prove pointers refer to independent arrays in memory. The loop requires 11 runtime independence checks to vectorize the loop, but that would exceed the limit of 8 checks +;CHECK: remark: {{.*}}:0:0: loop not vectorized: cannot prove it is safe to reorder memory operations + +; First loop produced diagnostic pass remark. +;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: {{[0-9]}}, interleaved count: 1) +; Second loop produces diagnostic pass remark. +;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: {{[0-9]}}, interleaved count: 1) ; We are vectorizing with 6 runtime checks. ;CHECK-LABEL: func1x6( -;CHECK: <4 x i32> +;CHECK: <{{[0-9]}} x i32> ;CHECK: ret +;OVERRIDE-LABEL: func1x6( +;OVERRIDE: <4 x i32> +;OVERRIDE: ret define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) { entry: br label %for.body @@ -44,8 +54,12 @@ ; We are not vectorizing with 12 runtime checks. ;CHECK-LABEL: func2x6( -;CHECK-NOT: <4 x i32> +;CHECK-NOT: <{{[0-9]}} x i32> ;CHECK: ret +; We vectorize with 12 checks if a vectorization hint is provided. +;OVERRIDE-LABEL: func2x6( +;OVERRIDE: <4 x i32> +;OVERRIDE: ret define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) { entry: br label %for.body