Index: docs/Vectorizers.rst =================================================================== --- docs/Vectorizers.rst +++ docs/Vectorizers.rst @@ -99,7 +99,9 @@ indicates if vectorization was specified. ``-Rpass-analysis=loop-vectorize`` identifies the statements that caused -vectorization to fail. +vectorization to fail. If in addition ``-fsave-optimization-record`` is +provided, multiple causes of vectorization failure may be listed (this behavior +might change in the future). Consider the following loop: Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5086,12 +5086,18 @@ } bool LoopVectorizationLegality::canVectorize() { + // Store the result and return it at the end instead of exiting early, in case + // allowExtraAnalysis is used to report multiple reasons for not vectorizing. + bool Result = true; // We must have a loop in canonical form. Loops with indirectbr in them cannot // be canonicalized. if (!TheLoop->getLoopPreheader()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // FIXME: The code is currently dead, since the loop gets sent to @@ -5101,21 +5107,30 @@ if (!TheLoop->empty()) { ORE->emit(createMissedAnalysis("NotInnermostLoop") << "loop is not the innermost loop"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We must have a single backedge. if (TheLoop->getNumBackEdges() != 1) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We must have a single exiting block. if (!TheLoop->getExitingBlock()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We only handle bottom-tested loops, i.e. loop in which the condition is @@ -5124,7 +5139,10 @@ if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We need to have a loop header. @@ -5135,28 +5153,28 @@ unsigned NumBlocks = TheLoop->getNumBlocks(); if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); - return false; - } - - // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE.getBackedgeTakenCount(); - if (ExitCount == PSE.getSE()->getCouldNotCompute()) { - ORE->emit(createMissedAnalysis("CantComputeNumberOfIterations") - << "could not determine number of loop iterations"); - DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // Check if we can vectorize the instructions and CFG in this loop. if (!canVectorizeInstrs()) { DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // Go over each instruction and look at memory deps. if (!canVectorizeMemory()) { DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } DEBUG(dbgs() << "LV: We can vectorize this loop" @@ -5184,13 +5202,17 @@ << "Too many SCEV assumptions need to be made and checked " << "at runtime"); DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } - // Okay! We can vectorize. At this point we don't have any other mem analysis + // Okay! We've done all the tests. If any have failed, return false. Otherwise + // we can vectorize, and at this point we don't have any other mem analysis // which may limit our maximum vectorization factor, so just return true with // no restrictions. - return true; + return Result; } static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { Index: test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll =================================================================== --- test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll +++ test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll @@ -11,38 +11,38 @@ ; break; ; } ; } +; File, line, and column should match those specified in the metadata +; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations +; CHECK: remark: source.cpp:4:5: loop not vectorized ; void test_disabled(int *A, int Length) { ; #pragma clang loop vectorize(disable) interleave(disable) ; for (int i = 0; i < Length; i++) ; A[i] = i; ; } +; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1 ; void test_array_bounds(int *A, int *B, int Length) { ; #pragma clang loop vectorize(enable) ; for (int i = 0; i < Length; i++) ; A[i] = A[B[i]]; ; } - -; File, line, and column should match those specified in the metadata -; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations -; CHECK: remark: source.cpp:4:5: loop not vectorized -; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1 ; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds ; CHECK: remark: source.cpp:19:5: loop not vectorized ; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization -; CHECK: _Z4testPii -; CHECK-NOT: x i32> -; CHECK: ret - -; CHECK: _Z13test_disabledPii -; CHECK-NOT: x i32> -; CHECK: ret - -; CHECK: _Z17test_array_boundsPiS_i -; CHECK-NOT: x i32> -; CHECK: ret +; int foo(); +; void test_multiple_failures(int *A) { +; int k = 0; +; #pragma clang loop vectorize(enable) interleave(enable) +; for (int i = 0; i < 1000; i+=A[i]) { +; if (A[i]) +; k = foo(); +; } +; return k; +; } +; CHECK: remark: source.cpp:29:7: loop not vectorized: control flow cannot be substituted for a select +; CHECK: remark: source.cpp:27:3: loop not vectorized ; YAML: --- !Analysis ; YAML-NEXT: Pass: loop-vectorize @@ -98,6 +98,41 @@ ; YAML-NEXT: - String: 'loop not vectorized: ' ; YAML-NEXT: - String: failed explicitly specified loop vectorization ; YAML-NEXT: ... +; YAML-NEXT: --- !Analysis +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: NoCFGForSelect +; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 29, Column: 7 } +; YAML-NEXT: Function: test_multiple_failures +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: control flow cannot be substituted for a select +; YAML-NEXT: ... +; YAML-NEXT: --- !Analysis +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: NonReductionValueUsedOutsideLoop +; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 } +; YAML-NEXT: Function: test_multiple_failures +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: value that could not be identified as reduction is used outside the loop +; YAML-NEXT: ... +; YAML-NEXT: --- !Analysis +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: CantComputeNumberOfIterations +; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 } +; YAML-NEXT: Function: test_multiple_failures +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: could not determine number of loop iterations +; YAML-NEXT: ... +; YAML-NEXT: --- !Missed +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: MissedDetails +; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 } +; YAML-NEXT: Function: test_multiple_failures +; YAML-NEXT: Args: +; YAML-NEXT: - String: loop not vectorized +; YAML-NEXT: ... target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @@ -124,6 +159,10 @@ ret void, !dbg !24 } +; CHECK: _Z4testPii +; CHECK-NOT: x i32> +; CHECK: ret + ; Function Attrs: nounwind optsize ssp uwtable define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 !dbg !7 { entry: @@ -144,6 +183,10 @@ ret void, !dbg !31 } +; CHECK: _Z13test_disabledPii +; CHECK-NOT: x i32> +; CHECK: ret + ; Function Attrs: nounwind optsize ssp uwtable define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 !dbg !8 { entry: @@ -174,6 +217,45 @@ ret void, !dbg !36 } +; CHECK: _Z17test_array_boundsPiS_i +; CHECK-NOT: x i32> +; CHECK: ret + +; Function Attrs: nounwind uwtable +define i32 @test_multiple_failures(i32* nocapture readonly %A) #0 !dbg !46 { +entry: + br label %for.body, !dbg !38 + +for.body: ; preds = %entry, %for.inc + %i.09 = phi i32 [ 0, %entry ], [ %add, %for.inc ] + %k.09 = phi i32 [ 0, %entry ], [ %k.1, %for.inc ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.09, !dbg !40 + %0 = load i32, i32* %arrayidx, align 4, !dbg !40 + %tobool = icmp eq i32 %0, 0, !dbg !40 + br i1 %tobool, label %for.inc, label %if.then, !dbg !40 + +if.then: ; preds = %for.body + %call = tail call i32 (...) @foo(), !dbg !41 + %.pre = load i32, i32* %arrayidx, align 4 + br label %for.inc, !dbg !42 + +for.inc: ; preds = %for.body, %if.then + %1 = phi i32 [ %.pre, %if.then ], [ 0, %for.body ], !dbg !43 + %k.1 = phi i32 [ %call, %if.then ], [ %k.09, %for.body ] + %add = add nsw i32 %1, %i.09, !dbg !44 + %cmp = icmp slt i32 %add, 1000, !dbg !45 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !38 + +for.cond.cleanup: ; preds = %for.inc + ret i32 %k.1, !dbg !39 +} + +declare i32 @foo(...) + +; CHECK: test_multiple_failure +; CHECK-NOT: x i32> +; CHECK: ret + attributes #0 = { nounwind } !llvm.dbg.cu = !{!0} @@ -216,3 +298,13 @@ !34 = !{!34, !15} !35 = !DILocation(line: 19, column: 5, scope: !33) !36 = !DILocation(line: 20, column: 1, scope: !8) +!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46) +!38 = !DILocation(line: 27, column: 3, scope: !37) +!39 = !DILocation(line: 31, column: 3, scope: !37) +!40 = !DILocation(line: 28, column: 9, scope: !37) +!41 = !DILocation(line: 29, column: 11, scope: !37) +!42 = !DILocation(line: 29, column: 7, scope: !37) +!43 = !DILocation(line: 27, column: 32, scope: !37) +!44 = !DILocation(line: 27, column: 30, scope: !37) +!45 = !DILocation(line: 27, column: 21, scope: !37) +!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, variables: !2)