diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1057,6 +1057,32 @@ return true; } +static void checkErrorHandling(SmallVector &ExitBBs, + Loop *TheLoop, OptimizationRemarkEmitter *ORE) { + for (BasicBlock *BB : ExitBBs) { + for (Instruction &Inst : *BB) { + if (auto *CI = dyn_cast(&Inst)) { + if (CI->getCalledFunction() && + CI->getCalledFunction()->getName() == "__assert_fail") { + reportVectorizationFailure( + "The loop must have no error handling", + "loop control flow is not understood by vectorizer. " + "Loop contains an assertion, compile with NDEBUG to remove", + "CFGNotUnderstood", ORE, TheLoop, CI); + return; + } + } else if (auto *U = dyn_cast(&Inst)) { + reportVectorizationFailure( + "The loop must have no error handling", + "loop control flow is not understood by vectorizer. " + "Loop contains control flow that does not return", + "CFGNotUnderstood", ORE, TheLoop, U); + return; + } + } + } +} + // Helper function to canVectorizeLoopNestCFG. bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath) { @@ -1096,6 +1122,13 @@ return false; } + // Check that this loop does not contain error handling blocks + if (!Lp->getUniqueExitBlock() && DoExtraAnalysis) { + SmallVector ExitBBs; + Lp->getExitBlocks(ExitBBs); + checkErrorHandling(ExitBBs, TheLoop, ORE); + } + // We currently must have a single "exit block" after the loop. Note that // multiple "exiting blocks" inside the loop are allowed, provided they all // reach the single exit block. diff --git a/llvm/test/Transforms/LoopVectorize/error-handling-remarks.ll b/llvm/test/Transforms/LoopVectorize/error-handling-remarks.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/error-handling-remarks.ll @@ -0,0 +1,142 @@ +; RUN: opt -loop-vectorize -pass-remarks-analysis=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s +; ModuleID = 'error-handling-remarks.cpp' +source_filename = "error-handling-remarks.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; #include +; +; void f(double *X, std::size_t N) { +; __builtin_assume(N > 32 && N % 32 == 0); +; +; for (std::size_t i = 0; i < N; ++i) { +; assert(X[i] != 0.0f && "Division by zero!"); +; X[i] = 1.0 / X[i]; +; } +; +; for (std::size_t i = 0; i < N; ++i) { +; if (X[i] == 0.0f) +; throw(true); +; X[i] = 1.0 / X[i]; +; } +; } + +; CHECK: remark: error-handling-remarks.cpp:7:5: loop not vectorized: loop control flow is not understood by vectorizer. Loop contains an assertion, compile with NDEBUG to remove +; CHECK: remark: error-handling-remarks.cpp:13:7: loop not vectorized: loop control flow is not understood by vectorizer. Loop contains control flow that does not return + +@.str.1 = private unnamed_addr constant [36 x i8] c"X[i] != 0.0f && \22Division by zero!\22\00", align 1 +@.str.2 = private unnamed_addr constant [27 x i8] c"error-handling-remarks.cpp\00", align 1 +@__PRETTY_FUNCTION__._Z1fPdm = private unnamed_addr constant [30 x i8] c"void f(double *, std::size_t)\00", align 1 +@_ZTIb = external dso_local constant i8* + +; Function Attrs: uwtable mustprogress +define dso_local void @_Z1fPdm(double* nocapture %X, i64 %N) local_unnamed_addr #0 !dbg !6 { +entry: + %cmp = icmp ugt i64 %N, 32, !dbg !8 + %rem = and i64 %N, 31, !dbg !9 + %cmp1 = icmp eq i64 %rem, 0, !dbg !9 + call void @llvm.assume(i1 %cmp), !dbg !10 + call void @llvm.assume(i1 %cmp1), !dbg !10 + br label %for.body, !dbg !11 + +for.body: ; preds = %entry, %cond.end + %i.043 = phi i64 [ %inc, %cond.end ], [ 0, %entry ] + %arrayidx = getelementptr inbounds double, double* %X, i64 %i.043, !dbg !12 + %0 = load double, double* %arrayidx, align 8, !dbg !12, !tbaa !13 + %cmp3 = fcmp une double %0, 0.000000e+00, !dbg !12 + br i1 %cmp3, label %cond.end, label %cond.false, !dbg !12 + +cond.false: ; preds = %for.body + call void @__assert_fail(i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.1, i64 0, i64 0), i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.2, i64 0, i64 0), i32 7, i8* getelementptr inbounds ([30 x i8], [30 x i8]* @__PRETTY_FUNCTION__._Z1fPdm, i64 0, i64 0)) #3, !dbg !12 + unreachable, !dbg !12 + +cond.end: ; preds = %for.body + %div = fdiv double 1.000000e+00, %0, !dbg !17 + store double %div, double* %arrayidx, align 8, !dbg !18, !tbaa !13 + %inc = add nuw i64 %i.043, 1, !dbg !19 + %exitcond44.not = icmp eq i64 %inc, %N, !dbg !20 + br i1 %exitcond44.not, label %for.body12, label %for.body, !dbg !11, !llvm.loop !21 + +for.cond.cleanup11: ; preds = %if.end + ret void, !dbg !25 + +for.body12: ; preds = %cond.end, %if.end + %i8.041 = phi i64 [ %inc19, %if.end ], [ 0, %cond.end ] + %arrayidx13 = getelementptr inbounds double, double* %X, i64 %i8.041, !dbg !26 + %1 = load double, double* %arrayidx13, align 8, !dbg !26, !tbaa !13 + %cmp14 = fcmp oeq double %1, 0.000000e+00, !dbg !27 + br i1 %cmp14, label %if.then, label %if.end, !dbg !26 + +if.then: ; preds = %for.body12 + %exception = call i8* @__cxa_allocate_exception(i64 1) #4, !dbg !28 + store i8 1, i8* %exception, align 16, !dbg !28, !tbaa !29 + call void @__cxa_throw(i8* nonnull %exception, i8* bitcast (i8** @_ZTIb to i8*), i8* null) #5, !dbg !28 + unreachable, !dbg !28 + +if.end: ; preds = %for.body12 + %div16 = fdiv double 1.000000e+00, %1, !dbg !31 + store double %div16, double* %arrayidx13, align 8, !dbg !32, !tbaa !13 + %inc19 = add nuw i64 %i8.041, 1, !dbg !33 + %exitcond.not = icmp eq i64 %inc19, %N, !dbg !34 + br i1 %exitcond.not, label %for.cond.cleanup11, label %for.body12, !dbg !35, !llvm.loop !36 +} + +; Function Attrs: nofree nosync nounwind willreturn +declare void @llvm.assume(i1 noundef) #1 + +; Function Attrs: noreturn nounwind +declare dso_local void @__assert_fail(i8*, i8*, i32, i8*) local_unnamed_addr #2 + +declare dso_local i8* @__cxa_allocate_exception(i64) local_unnamed_addr + +declare dso_local void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr + +attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nofree nosync nounwind willreturn } +attributes #2 = { noreturn nounwind "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { noreturn nounwind } +attributes #4 = { nounwind } +attributes #5 = { noreturn } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 13.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "error-handling-remarks.cpp", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{!"clang version 13.0.0"} +!6 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 4, column: 22, scope: !6) +!9 = !DILocation(line: 4, column: 27, scope: !6) +!10 = !DILocation(line: 4, column: 3, scope: !6) +!11 = !DILocation(line: 6, column: 3, scope: !6) +!12 = !DILocation(line: 7, column: 5, scope: !6) +!13 = !{!14, !14, i64 0} +!14 = !{!"double", !15, i64 0} +!15 = !{!"omnipotent char", !16, i64 0} +!16 = !{!"Simple C++ TBAA"} +!17 = !DILocation(line: 8, column: 16, scope: !6) +!18 = !DILocation(line: 8, column: 10, scope: !6) +!19 = !DILocation(line: 6, column: 34, scope: !6) +!20 = !DILocation(line: 6, column: 29, scope: !6) +!21 = distinct !{!21, !11, !22, !23, !24} +!22 = !DILocation(line: 9, column: 3, scope: !6) +!23 = !{!"llvm.loop.mustprogress"} +!24 = !{!"llvm.loop.unroll.disable"} +!25 = !DILocation(line: 16, column: 1, scope: !6) +!26 = !DILocation(line: 12, column: 9, scope: !6) +!27 = !DILocation(line: 12, column: 14, scope: !6) +!28 = !DILocation(line: 13, column: 7, scope: !6) +!29 = !{!30, !30, i64 0} +!30 = !{!"bool", !15, i64 0} +!31 = !DILocation(line: 14, column: 16, scope: !6) +!32 = !DILocation(line: 14, column: 10, scope: !6) +!33 = !DILocation(line: 11, column: 34, scope: !6) +!34 = !DILocation(line: 11, column: 29, scope: !6) +!35 = !DILocation(line: 11, column: 3, scope: !6) +!36 = distinct !{!36, !35, !37, !23, !24} +!37 = !DILocation(line: 15, column: 3, scope: !6)