Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11449,11 +11449,18 @@ } // Search up and down at the same time, because we don't know if the new // instruction is above or below the existing scheduling region. + // Ignore debug info so that's not counted against the budget. Otherwise + // debug info could affect codegen. BasicBlock::reverse_iterator UpIter = ++ScheduleStart->getIterator().getReverse(); BasicBlock::reverse_iterator UpperEnd = BB->rend(); BasicBlock::iterator DownIter = ScheduleEnd->getIterator(); BasicBlock::iterator LowerEnd = BB->end(); + auto IsDbgInstr = [](const Instruction &I) { + return isa(&I); + }; + UpIter = std::find_if_not(UpIter, UpperEnd, IsDbgInstr); + DownIter = std::find_if_not(DownIter, LowerEnd, IsDbgInstr); while (UpIter != UpperEnd && DownIter != LowerEnd && &*UpIter != I && &*DownIter != I) { if (++ScheduleRegionSize > ScheduleRegionSizeLimit) { @@ -11463,6 +11470,9 @@ ++UpIter; ++DownIter; + + UpIter = std::find_if_not(UpIter, UpperEnd, IsDbgInstr); + DownIter = std::find_if_not(DownIter, LowerEnd, IsDbgInstr); } if (DownIter == LowerEnd || (UpIter != UpperEnd && &*UpIter == I)) { assert(I->getParent() == ScheduleStart->getParent() && Index: llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll +++ llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=slp-vectorizer -S -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s +; RUN: opt < %s -passes=slp-vectorizer -S -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s -check-prefix NO_VECTOR +; RUN: opt < %s -passes=slp-vectorizer -S -slp-schedule-budget=18 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s -check-prefix VECTOR_DBG +; RUN: opt < %s -strip-debug -passes=slp-vectorizer -S -slp-schedule-budget=18 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s -check-prefix VECTOR_NODBG target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" @@ -7,44 +9,137 @@ ; Test if the budget for the scheduling region size works. ; We test with a reduced budget of 16 which should prevent vectorizing the loads. +; We also verify that we do get vectorization with a sufficiently large budget, +; 18. We should get vectorization even if there happens to be some dbg.value +; calls since they should be ignored, to not let debug information affect the +; code we get. + declare void @unknown() define void @test(ptr %a, ptr %b, ptr %c, ptr %d) { -; CHECK-LABEL: @test( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4 -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[B:%.*]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4 -; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[D:%.*]], align 4 -; CHECK-NEXT: ret void +; NO_VECTOR-LABEL: @test( +; NO_VECTOR-NEXT: entry: +; NO_VECTOR-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4 +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @unknown() +; NO_VECTOR-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]] +; NO_VECTOR-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4]], metadata !DIExpression()), !dbg [[DBG14]] +; NO_VECTOR-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4]], metadata !DIExpression()), !dbg [[DBG14]] +; NO_VECTOR-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4]], metadata !DIExpression()), !dbg [[DBG14]] +; NO_VECTOR-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4]], metadata !DIExpression()), !dbg [[DBG14]] +; NO_VECTOR-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4]], metadata !DIExpression()), !dbg [[DBG14]] +; NO_VECTOR-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4]], metadata !DIExpression()), !dbg [[DBG14]] +; NO_VECTOR-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4]], metadata !DIExpression()), !dbg [[DBG14]] +; NO_VECTOR-NEXT: store <4 x float> [[TMP0]], ptr [[B:%.*]], align 4 +; NO_VECTOR-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4 +; NO_VECTOR-NEXT: store <4 x float> [[TMP1]], ptr [[D:%.*]], align 4 +; NO_VECTOR-NEXT: ret void +; +; VECTOR_DBG-LABEL: @test( +; VECTOR_DBG-NEXT: entry: +; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]] +; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4]], metadata !DIExpression()), !dbg [[DBG14]] +; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4]], metadata !DIExpression()), !dbg [[DBG14]] +; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4]], metadata !DIExpression()), !dbg [[DBG14]] +; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4]], metadata !DIExpression()), !dbg [[DBG14]] +; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4]], metadata !DIExpression()), !dbg [[DBG14]] +; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4]], metadata !DIExpression()), !dbg [[DBG14]] +; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META4]], metadata !DIExpression()), !dbg [[DBG14]] +; VECTOR_DBG-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4 +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: call void @unknown() +; VECTOR_DBG-NEXT: store <4 x float> [[TMP0]], ptr [[B:%.*]], align 4 +; VECTOR_DBG-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4 +; VECTOR_DBG-NEXT: store <4 x float> [[TMP1]], ptr [[D:%.*]], align 4 +; VECTOR_DBG-NEXT: ret void +; +; VECTOR_NODBG-LABEL: @test( +; VECTOR_NODBG-NEXT: entry: +; VECTOR_NODBG-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4 +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: call void @unknown() +; VECTOR_NODBG-NEXT: store <4 x float> [[TMP0]], ptr [[B:%.*]], align 4 +; VECTOR_NODBG-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4 +; VECTOR_NODBG-NEXT: store <4 x float> [[TMP1]], ptr [[D:%.*]], align 4 +; VECTOR_NODBG-NEXT: ret void ; entry: ; Don't vectorize these loads. @@ -86,8 +181,20 @@ call void @unknown() call void @unknown() + ; The dbg.values should not affect vectorization. + call void @llvm.dbg.value(metadata i16 1, metadata !4, metadata !DIExpression()), !dbg !14 + call void @llvm.dbg.value(metadata i16 1, metadata !4, metadata !DIExpression()), !dbg !14 + call void @llvm.dbg.value(metadata i16 1, metadata !4, metadata !DIExpression()), !dbg !14 + call void @llvm.dbg.value(metadata i16 1, metadata !4, metadata !DIExpression()), !dbg !14 + ; Don't vectorize these stores because their operands are too far away. store float %l0, ptr %b + + ; The dbg.values should not affect vectorization. + call void @llvm.dbg.value(metadata i16 1, metadata !4, metadata !DIExpression()), !dbg !14 + call void @llvm.dbg.value(metadata i16 1, metadata !4, metadata !DIExpression()), !dbg !14 + call void @llvm.dbg.value(metadata i16 1, metadata !4, metadata !DIExpression()), !dbg !14 + call void @llvm.dbg.value(metadata i16 1, metadata !4, metadata !DIExpression()), !dbg !14 %b1 = getelementptr inbounds float, ptr %b, i64 1 store float %l1, ptr %b1 %b2 = getelementptr inbounds float, ptr %b, i64 2 @@ -116,3 +223,21 @@ ret void } +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.value(metadata, metadata, metadata) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "foo2.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !DILocalVariable(name: "k", scope: !5, file: !1, line: 12, type: !13) +!5 = distinct !DILexicalBlock(scope: !11, file: !1, line: 12, column: 7) +!11 = distinct !DISubprogram(name: "l", scope: !1, file: !1, line: 6, type: !12, scopeLine: 6, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!12 = !DISubroutineType(types: !2) +!13 = !DIBasicType(name: "int", size: 16, encoding: DW_ATE_signed) +!14 = !DILocation(line: 0, scope: !5)