Index: docs/Vectorizers.rst =================================================================== --- docs/Vectorizers.rst +++ docs/Vectorizers.rst @@ -99,7 +99,8 @@ indicates if vectorization was specified. ``-Rpass-analysis=loop-vectorize`` identifies the statements that caused -vectorization to fail. +vectorization to fail. If in addition ``-fsave-optimization-record`` is +provided, multiple causes of vectorization failure may be listed. Consider the following loop: Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5086,12 +5086,16 @@ } bool LoopVectorizationLegality::canVectorize() { + bool Result = true; // We must have a loop in canonical form. Loops with indirectbr in them cannot // be canonicalized. if (!TheLoop->getLoopPreheader()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // FIXME: The code is currently dead, since the loop gets sent to @@ -5101,21 +5105,30 @@ if (!TheLoop->empty()) { ORE->emit(createMissedAnalysis("NotInnermostLoop") << "loop is not the innermost loop"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We must have a single backedge. if (TheLoop->getNumBackEdges() != 1) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We must have a single exiting block. if (!TheLoop->getExitingBlock()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We only handle bottom-tested loops, i.e. loop in which the condition is @@ -5124,7 +5137,10 @@ if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We need to have a loop header. @@ -5135,28 +5151,28 @@ unsigned NumBlocks = TheLoop->getNumBlocks(); if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); - return false; - } - - // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE.getBackedgeTakenCount(); - if (ExitCount == PSE.getSE()->getCouldNotCompute()) { - ORE->emit(createMissedAnalysis("CantComputeNumberOfIterations") - << "could not determine number of loop iterations"); - DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // Check if we can vectorize the instructions and CFG in this loop. if (!canVectorizeInstrs()) { DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // Go over each instruction and look at memory deps. if (!canVectorizeMemory()) { DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } DEBUG(dbgs() << "LV: We can vectorize this loop" @@ -5184,13 +5200,17 @@ << "Too many SCEV assumptions need to be made and checked " << "at runtime"); DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } - // Okay! We can vectorize. At this point we don't have any other mem analysis + // Okay! We've done all the tests. If any have failed, return false. Otherwise + // we can vectorize, and at this point we don't have any other mem analysis // which may limit our maximum vectorization factor, so just return true with // no restrictions. - return true; + return Result; } static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { Index: test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll =================================================================== --- test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll +++ test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll @@ -24,6 +24,17 @@ ; A[i] = A[B[i]]; ; } +; int foo(); +; void test_multiple_failures(int *A) { +; int k = 0; +; #pragma clang loop vectorize(enable) interleave(enable) +; for (int i = 0; i < 1000; i+=A[i]) { +; if (A[i]) +; k = foo(); +; } +; return k; +; } + ; File, line, and column should match those specified in the metadata ; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations ; CHECK: remark: source.cpp:4:5: loop not vectorized @@ -31,6 +42,8 @@ ; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds ; CHECK: remark: source.cpp:19:5: loop not vectorized ; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization +; CHECK: remark: :0:0: loop not vectorized: control flow cannot be substituted for a select +; CHECK: remark: :0:0: loop not vectorized ; CHECK: _Z4testPii ; CHECK-NOT: x i32> @@ -44,6 +57,10 @@ ; CHECK-NOT: x i32> ; CHECK: ret +; CHECK: test_multiple_failure +; CHECK-NOT: x i32> +; CHECK: ret + ; YAML: --- !Analysis ; YAML-NEXT: Pass: loop-vectorize ; YAML-NEXT: Name: CantComputeNumberOfIterations @@ -98,6 +115,37 @@ ; YAML-NEXT: - String: 'loop not vectorized: ' ; YAML-NEXT: - String: failed explicitly specified loop vectorization ; YAML-NEXT: ... +; YAML-NEXT: --- !Analysis +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: NoCFGForSelect +; YAML-NEXT: Function: test_multiple_failures +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: control flow cannot be substituted for a select +; YAML-NEXT: ... +; YAML-NEXT: --- !Analysis +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: NonReductionValueUsedOutsideLoop +; YAML-NEXT: Function: test_multiple_failures +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: value that could not be identified as reduction is used outside the loop +; YAML-NEXT: ... +; YAML-NEXT: --- !Analysis +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: CantComputeNumberOfIterations +; YAML-NEXT: Function: test_multiple_failures +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: could not determine number of loop iterations +; YAML-NEXT: ... +; YAML-NEXT: --- !Missed +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: MissedDetails +; YAML-NEXT: Function: test_multiple_failures +; YAML-NEXT: Args: +; YAML-NEXT: - String: loop not vectorized +; YAML-NEXT: ... target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @@ -174,6 +222,37 @@ ret void, !dbg !36 } +; Function Attrs: nounwind uwtable +define i32 @test_multiple_failures(i32* nocapture readonly %A) #0 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.inc + %i.09 = phi i32 [ 0, %entry ], [ %add, %for.inc ] + %k.09 = phi i32 [ 0, %entry ], [ %k.1, %for.inc ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.09 + %0 = load i32, i32* %arrayidx, align 4 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %for.inc, label %if.then + +if.then: ; preds = %for.body + %call = tail call i32 (...) @foo() + %.pre = load i32, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %1 = phi i32 [ %.pre, %if.then ], [ 0, %for.body ] + %k.1 = phi i32 [ %call, %if.then ], [ %k.09, %for.body ] + %add = add nsw i32 %1, %i.09 + %cmp = icmp slt i32 %add, 1000 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.inc + ret i32 %k.1 +} + +declare i32 @foo(...) + attributes #0 = { nounwind } !llvm.dbg.cu = !{!0}