Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1758,6 +1758,10 @@ /// The loop that we evaluate. Loop *TheLoop; + /// A global variable which stores the instruction address responsible + /// for 'control flow cannot be substituted for a select' error + Instruction *SelectRemarkInstruction; + /// A wrapper around ScalarEvolution used to add runtime SCEV checks. /// Applies dynamic knowledge to simplify SCEV expressions in the context /// of existing SCEV assumptions. The analysis will also add a minimal set @@ -4829,8 +4833,8 @@ // We must be able to predicate all blocks that need to be predicated. if (blockNeedsPredication(BB)) { if (!blockCanBePredicated(BB, SafePointes)) { - ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator()) - << "control flow cannot be substituted for a select"); + ORE->emit(createMissedAnalysis("NoCFGForSelect", SelectRemarkInstruction) + << "if-conversion not possible due to lack of support for predication"); return false; } } else if (BB != Header && !canIfConvertPHINodes(BB)) { @@ -5694,14 +5698,18 @@ // Check that we don't have a constant expression that can trap as operand. for (Value *Operand : I.operands()) { if (auto *C = dyn_cast(Operand)) - if (C->canTrap()) + if (C->canTrap()){ + SelectRemarkInstruction = &I; return false; + } } // We might be able to hoist the load. if (I.mayReadFromMemory()) { auto *LI = dyn_cast(&I); - if (!LI) + if (!LI){ + SelectRemarkInstruction = LI; return false; + } if (!SafePtrs.count(LI->getPointerOperand())) { if (isLegalMaskedLoad(LI->getType(), LI->getPointerOperand()) || isLegalMaskedGather(LI->getType())) { @@ -5711,6 +5719,7 @@ // !llvm.mem.parallel_loop_access implies if-conversion safety. if (IsAnnotatedParallel) continue; + SelectRemarkInstruction = &I; return false; } } @@ -5719,9 +5728,10 @@ auto *SI = dyn_cast(&I); // We only support predication of stores in basic blocks with one // predecessor. - if (!SI) + if (!SI){ + SelectRemarkInstruction = SI; return false; - + } // Build a masked store if it is legal for the target. if (isLegalMaskedStore(SI->getValueOperand()->getType(), SI->getPointerOperand()) || @@ -5734,11 +5744,15 @@ bool isSinglePredecessor = SI->getParent()->getSinglePredecessor(); if (++NumPredStores > NumberOfStoresToPredicate || !isSafePtr || - !isSinglePredecessor) + !isSinglePredecessor){ + SelectRemarkInstruction = SI; return false; + } } - if (I.mayThrow()) + if (I.mayThrow()){ + SelectRemarkInstruction = &I; return false; + } } return true; Index: test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll =================================================================== --- test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll +++ test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll @@ -45,7 +45,7 @@ ; } ; return k; ; } -; CHECK: remark: source.cpp:29:7: loop not vectorized: control flow cannot be substituted for a select +; CHECK: remark: source.cpp:27:3: loop not vectorized: if-conversion not possible due to lack of support for predication ; CHECK: remark: source.cpp:27:3: loop not vectorized ; YAML: --- !Analysis @@ -105,11 +105,11 @@ ; YAML-NEXT: --- !Analysis ; YAML-NEXT: Pass: loop-vectorize ; YAML-NEXT: Name: NoCFGForSelect -; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 29, Column: 7 } +; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 } ; YAML-NEXT: Function: test_multiple_failures ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'loop not vectorized: ' -; YAML-NEXT: - String: control flow cannot be substituted for a select +; YAML-NEXT: - String: if-conversion not possible due to lack of support for predication ; YAML-NEXT: ... ; YAML-NEXT: --- !Analysis ; YAML-NEXT: Pass: loop-vectorize Index: test/Transforms/LoopVectorize/select_remark_missed.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/select_remark_missed.ll @@ -0,0 +1,116 @@ +; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s +; CHECK: remark: source.c:17:10: loop not vectorized: if-conversion not possible due to lack of support for predication +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@b = common global [32000 x float] zeroinitializer, align 16 +@d = common global [32000 x float] zeroinitializer, align 16 +@e = common global [32000 x float] zeroinitializer, align 16 +@a = common global [32000 x float] zeroinitializer, align 16 +@c = common global [32000 x float] zeroinitializer, align 16 +@aa = common global [256 x [256 x float]] zeroinitializer, align 16 +@bb = common global [256 x [256 x float]] zeroinitializer, align 16 +@cc = common global [256 x [256 x float]] zeroinitializer, align 16 +@tt = common local_unnamed_addr global [256 x [256 x float]] zeroinitializer, align 16 + +; Function Attrs: nounwind uwtable +define i32 @main() local_unnamed_addr #0 !dbg !6 { +entry: + br label %for.body, !dbg !8 + +for.cond.cleanup: ; preds = %for.cond.cleanup3 + ret i32 0, !dbg !9 + +for.body: ; preds = %for.cond.cleanup3, %entry + %nl.046 = phi i32 [ 0, %entry ], [ %inc27, %for.cond.cleanup3 ] + br label %for.body4, !dbg !10 + +for.cond.cleanup3: ; preds = %for.inc + %call = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) #2, !dbg !11 + %inc27 = add nuw nsw i32 %nl.046, 1, !dbg !12 + %exitcond47 = icmp eq i32 %inc27, 200000, !dbg !13 + br i1 %exitcond47, label %for.cond.cleanup, label %for.body, !dbg !8, !llvm.loop !14 + +for.body4: ; preds = %for.inc, %for.body + %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.inc ] + %j.044 = phi i32 [ -1, %for.body ], [ %j.1, %for.inc ] + %inc = add nsw i32 %j.044, 1, !dbg !16 + %arrayidx = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %indvars.iv, !dbg !17 + %0 = load float, float* %arrayidx, align 4, !dbg !17, !tbaa !18 + %arrayidx6 = getelementptr inbounds [32000 x float], [32000 x float]* @d, i64 0, i64 %indvars.iv, !dbg !22 + %1 = load float, float* %arrayidx6, align 4, !dbg !22, !tbaa !18 + %arrayidx8 = getelementptr inbounds [32000 x float], [32000 x float]* @e, i64 0, i64 %indvars.iv, !dbg !23 + %2 = load float, float* %arrayidx8, align 4, !dbg !23, !tbaa !18 + %mul = fmul fast float %2, %1, !dbg !24 + %add = fadd fast float %mul, %0, !dbg !25 + %idxprom9 = sext i32 %inc to i64, !dbg !26 + %arrayidx10 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %idxprom9, !dbg !26 + store float %add, float* %arrayidx10, align 4, !dbg !27, !tbaa !18 + %arrayidx12 = getelementptr inbounds [32000 x float], [32000 x float]* @c, i64 0, i64 %indvars.iv, !dbg !28 + %3 = load float, float* %arrayidx12, align 4, !dbg !28, !tbaa !18 + %cmp13 = fcmp fast ogt float %3, 0.000000e+00, !dbg !29 + br i1 %cmp13, label %if.then, label %for.inc, !dbg !28 + +if.then: ; preds = %for.body4 + %inc14 = add nsw i32 %j.044, 2, !dbg !30 + %add22 = fadd fast float %3, %mul, !dbg !31 + %idxprom23 = sext i32 %inc14 to i64, !dbg !32 + %arrayidx24 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %idxprom23, !dbg !32 + store float %add22, float* %arrayidx24, align 4, !dbg !33, !tbaa !18 + br label %for.inc, !dbg !34 + +for.inc: ; preds = %for.body4, %if.then + %j.1 = phi i32 [ %inc14, %if.then ], [ %inc, %for.body4 ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !35 + %exitcond = icmp eq i64 %indvars.iv.next, 16000, !dbg !36 + br i1 %exitcond, label %for.cond.cleanup3, label %for.body4, !dbg !10, !llvm.loop !37 +} + +declare i32 @dummy(float*, float*, float*, float*, float*, [256 x float]*, [256 x float]*, [256 x float]*, float) local_unnamed_addr #1 + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+avx,+avx2,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+avx,+avx2,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 7.0.0 (https://git.llvm.org/git/clang.git/ a842b5f965a003448c860bf811d69bf91a844b05) (https://github.com/llvm-mirror/llvm.git b43fe4d6c762fd678f601e725c38201216ce1447)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "source.c", directory: "/mnt/1/tharun/llvm-remarks/LLVM_remarks") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{!"clang version 7.0.0 (https://git.llvm.org/git/clang.git/ a842b5f965a003448c860bf811d69bf91a844b05) (https://github.com/llvm-mirror/llvm.git b43fe4d6c762fd678f601e725c38201216ce1447)"} +!6 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 8, type: !7, isLocal: false, isDefinition: true, scopeLine: 8, isOptimized: true, unit: !0, variables: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 10, column: 2, scope: !6) +!9 = !DILocation(line: 24, column: 1, scope: !6) +!10 = !DILocation(line: 12, column: 3, scope: !6) +!11 = !DILocation(line: 20, column: 3, scope: !6) +!12 = !DILocation(line: 10, column: 34, scope: !6) +!13 = !DILocation(line: 10, column: 22, scope: !6) +!14 = distinct !{!14, !8, !15} +!15 = !DILocation(line: 23, column: 2, scope: !6) +!16 = !DILocation(line: 13, column: 5, scope: !6) +!17 = !DILocation(line: 14, column: 11, scope: !6) +!18 = !{!19, !19, i64 0} +!19 = !{!"float", !20, i64 0} +!20 = !{!"omnipotent char", !21, i64 0} +!21 = !{!"Simple C/C++ TBAA"} +!22 = !DILocation(line: 14, column: 18, scope: !6) +!23 = !DILocation(line: 14, column: 25, scope: !6) +!24 = !DILocation(line: 14, column: 23, scope: !6) +!25 = !DILocation(line: 14, column: 16, scope: !6) +!26 = !DILocation(line: 14, column: 4, scope: !6) +!27 = !DILocation(line: 14, column: 9, scope: !6) +!28 = !DILocation(line: 15, column: 8, scope: !6) +!29 = !DILocation(line: 15, column: 13, scope: !6) +!30 = !DILocation(line: 16, column: 6, scope: !6) +!31 = !DILocation(line: 17, column: 17, scope: !6) +!32 = !DILocation(line: 17, column: 5, scope: !6) +!33 = !DILocation(line: 17, column: 10, scope: !6) +!34 = !DILocation(line: 18, column: 4, scope: !6) +!35 = !DILocation(line: 12, column: 33, scope: !6) +!36 = !DILocation(line: 12, column: 21, scope: !6) +!37 = distinct !{!37, !10, !38} +!38 = !DILocation(line: 19, column: 3, scope: !6)