diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -46,7 +46,10 @@ class LoopVectorizeHints { enum HintKind { HK_WIDTH, - HK_UNROLL, + HK_INTERLEAVE, + HK_UNROLL_COUNT, + HK_UNROLL_DISABLED, + HK_UNROLL_RUNTIME_DISABLED, HK_FORCE, HK_ISVECTORIZED, HK_PREDICATE, @@ -71,6 +74,15 @@ /// Vectorization interleave factor. Hint Interleave; + /// Unroll factor (used for making interleave decisions). + Hint UnrollCount; + + /// Unrolling disabled (used for making interleave decisions). + Hint UnrollDisabled; + + /// Runtime unrolling disabled (used for making interleave decisions). + Hint UnrollRuntimeDisabled; + /// Vectorization forced Hint Force; @@ -111,7 +123,16 @@ ElementCount getWidth() const { return ElementCount::get(Width.Value, isScalable()); } - unsigned getInterleave() const { return Interleave.Value; } + unsigned getInterleave() const { + if (Interleave.Value) + return Interleave.Value; + // Consider interleaving disabled, if nounroll is requested. + if (1 == UnrollCount.Value || + 1 == UnrollDisabled.Value || + 1 == UnrollRuntimeDisabled.Value) + return 1; + return 0; + } unsigned getIsVectorized() const { return IsVectorized.Value; } unsigned getPredicate() const { return Predicate.Value; } enum ForceKind getForce() const { @@ -153,7 +174,9 @@ /// Find hints specified in the loop metadata and update local values. void getHintsFromMetadata(); - /// Checks string hint with one operand and set value if valid. + /// Checks string hint with zero or one operand and set value if valid. + /// \p Arg is the hint MD operand if there is one operand, and null + /// otherwise. void setHint(StringRef Name, Metadata *Arg); /// The loop these hints belong to. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -57,13 +57,17 @@ switch (Kind) { case HK_WIDTH: return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; - case HK_UNROLL: + case HK_INTERLEAVE: return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; + case HK_UNROLL_COUNT: + return Val >= 0; case HK_FORCE: return (Val <= 1); case HK_ISVECTORIZED: case HK_PREDICATE: case HK_SCALABLE: + case HK_UNROLL_DISABLED: + case HK_UNROLL_RUNTIME_DISABLED: return (Val == 0 || Val == 1); } return false; @@ -73,7 +77,10 @@ bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE) : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), - Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), + Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE), + UnrollCount("unroll.count", 0, HK_UNROLL_COUNT), + UnrollDisabled("unroll.disable", 0, HK_UNROLL_DISABLED), + UnrollRuntimeDisabled("unroll.runtime.disable", 0, HK_UNROLL_RUNTIME_DISABLED), Force("vectorize.enable", FK_Undefined, HK_FORCE), IsVectorized("isvectorized", 0, HK_ISVECTORIZED), Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), @@ -91,8 +98,8 @@ // consider the loop to have been already vectorized because there's // nothing more that we can do. IsVectorized.Value = - getWidth() == ElementCount::getFixed(1) && Interleave.Value == 1; - LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs() + getWidth() == ElementCount::getFixed(1) && getInterleave() == 1; + LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs() << "LV: Interleaving disabled by the pass manager\n"); } @@ -165,8 +172,8 @@ R << " (Force=" << NV("Force", true); if (Width.Value != 0) R << ", Vector Width=" << NV("VectorWidth", getWidth()); - if (Interleave.Value != 0) - R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); + if (getInterleave() != 0) + R << ", Interleave Count=" << NV("InterleaveCount", getInterleave()); R << ")"; } return R; @@ -213,10 +220,12 @@ if (!S) continue; + if (Args.size() > 1) + continue; + // Check if the hint starts with the loop metadata prefix. StringRef Name = S->getString(); - if (Args.size() == 1) - setHint(Name, Args[0]); + setHint(Name, Args.size() == 1 ? Args[0] : nullptr); } } @@ -225,13 +234,25 @@ return; Name = Name.substr(Prefix().size(), StringRef::npos); - const ConstantInt *C = mdconst::dyn_extract(Arg); - if (!C) - return; - unsigned Val = C->getZExtValue(); + // Use value of 1 to indicate 'true' when MD has no operand. For example if + // `unroll.disable` is seen, then UnrollDisabled will have a value of 1. + unsigned Val = 1; + if (Arg) { + const ConstantInt *C = mdconst::dyn_extract(Arg); + if (!C) + return; + Val = C->getZExtValue(); + } - Hint *Hints[] = {&Width, &Interleave, &Force, - &IsVectorized, &Predicate, &Scalable}; + Hint *Hints[] = {&Width, + &Interleave, + &UnrollCount, + &UnrollDisabled, + &UnrollRuntimeDisabled, + &Force, + &IsVectorized, + &Predicate, + &Scalable}; for (auto H : Hints) { if (Name == H->Name) { if (H->validate(Val)) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9651,7 +9651,7 @@ ? "enabled" : "?")) << " width=" << Hints.getWidth() - << " unroll=" << Hints.getInterleave() << "\n"); + << " interleave=" << Hints.getInterleave() << "\n"); // Function containing loop Function *F = L->getHeader()->getParent(); diff --git a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll --- a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll @@ -35,7 +35,7 @@ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !8 %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !8 %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !8 - br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !8, !llvm.loop !17 + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !8 } ; Function Attrs: nounwind readonly ssp uwtable @@ -92,8 +92,6 @@ !14 = !{!"Simple C/C++ TBAA"} !15 = !DILocation(line: 6, column: 19, scope: !4) !16 = !DILocation(line: 6, column: 11, scope: !4) -!17 = distinct !{!17, !18} -!18 = !{!"llvm.loop.unroll.disable"} !19 = !DILocation(line: 16, column: 20, scope: !20) !20 = distinct !DISubprogram(name: "cond_sum_loop_hint", scope: !5, file: !5, line: 12, type: !6, isLocal: false, isDefinition: true, scopeLine: 12, flags: DIFlagPrototyped, isOptimized: true, unit: !28, retainedNodes: !7) !21 = !DILocation(line: 16, column: 3, scope: !20) @@ -101,7 +99,7 @@ !23 = !DILocation(line: 20, column: 3, scope: !20) !24 = !DILocation(line: 17, column: 19, scope: !20) !25 = !DILocation(line: 17, column: 11, scope: !20) -!26 = distinct !{!26, !27, !18} +!26 = distinct !{!26, !27} !27 = !{!"llvm.loop.vectorize.enable", i1 true} !28 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", file: !5, diff --git a/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll b/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll --- a/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll +++ b/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll @@ -19,7 +19,7 @@ ; Case 1: Annotated outer loop WITH vector width information must be collected. ; CHECK-LABEL: vector_width -; CHECK: LV: Loop hints: force=enabled width=4 unroll=0 +; CHECK: LV: Loop hints: force=enabled width=4 interleave=0 ; CHECK: LV: We can vectorize this outer loop! ; CHECK: LV: Using user VF 4 to build VPlans. ; CHECK-NOT: LV: Loop hints: force=? @@ -71,7 +71,7 @@ ; Case 2: Annotated outer loop WITHOUT vector width information must be collected. ; CHECK-LABEL: case2 -; CHECK: LV: Loop hints: force=enabled width=0 unroll=0 +; CHECK: LV: Loop hints: force=enabled width=0 interleave=0 ; CHECK: LV: We can vectorize this outer loop! ; CHECK: LV: Using VF 1 to build VPlans. diff --git a/llvm/test/Transforms/LoopVectorize/nounroll.ll b/llvm/test/Transforms/LoopVectorize/nounroll.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/nounroll.ll @@ -0,0 +1,122 @@ +; RUN: opt < %s -passes='loop-vectorize' -debug-only=loop-vectorize -S 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" + +; CHECK: LV: Checking a loop in "f1" +; CHECK: LV: Loop hints: force=? width=0 interleave=1 +define dso_local void @f1(i32 signext %n, i32* %A) { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %0 = trunc i64 %indvars.iv to i32 + store i32 %0, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !1 + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +; CHECK: LV: Checking a loop in "f2" +; CHECK: LV: Loop hints: force=? width=0 interleave=1 +define dso_local void @f2(i32 signext %n, i32* %A) { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %0 = trunc i64 %indvars.iv to i32 + store i32 %0, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !3 + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +; CHECK: LV: Checking a loop in "f3" +; CHECK: LV: Loop hints: force=? width=0 interleave=4 +define dso_local void @f3(i32 signext %n, i32* %A) { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %0 = trunc i64 %indvars.iv to i32 + store i32 %0, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !5 + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +; CHECK: LV: Checking a loop in "f4" +; CHECK: LV: Loop hints: force=? width=0 interleave=1 +define dso_local void @f4(i32 signext %n, i32* %A) { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %0 = trunc i64 %indvars.iv to i32 + store i32 %0, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !8 + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +!1 = distinct !{!1, !2} +!2 = !{!"llvm.loop.unroll.disable"} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.unroll.runtime.disable"} +!5 = distinct !{!5, !6, !7} +!6 = !{!"llvm.loop.unroll.runtime.disable"} +!7 = !{!"llvm.loop.interleave.count", i32 4} +!8 = distinct !{!8, !9, !10} +!9 = !{!"llvm.loop.mustprogress"} +!10 = !{!"llvm.loop.unroll.count", i32 1}