diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -219,14 +219,15 @@ StringRef Name); /// Find named metadata for a loop with an integer value. -llvm::Optional getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name); +llvm::Optional getOptionalIntLoopAttribute(const Loop *TheLoop, + StringRef Name); /// Find a combination of metadata ("llvm.loop.vectorize.width" and /// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a /// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found /// then None is returned. Optional -getOptionalElementCountLoopAttribute(Loop *TheLoop); +getOptionalElementCountLoopAttribute(const Loop *TheLoop); /// Create a new loop identifier for a loop created from a loop transformation. /// @@ -295,11 +296,11 @@ /// @{ /// Get the mode for LLVM's supported loop transformations. -TransformationMode hasUnrollTransformation(Loop *L); -TransformationMode hasUnrollAndJamTransformation(Loop *L); -TransformationMode hasVectorizeTransformation(Loop *L); -TransformationMode hasDistributeTransformation(Loop *L); -TransformationMode hasLICMVersioningTransformation(Loop *L); +TransformationMode hasUnrollTransformation(const Loop *L); +TransformationMode hasUnrollAndJamTransformation(const Loop *L); +TransformationMode hasVectorizeTransformation(const Loop *L); +TransformationMode hasDistributeTransformation(const Loop *L); +TransformationMode hasLICMVersioningTransformation(const Loop *L); /// @} /// Set input string into loop metadata by keeping other values intact. diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -46,7 +46,7 @@ class LoopVectorizeHints { enum HintKind { HK_WIDTH, - HK_UNROLL, + HK_INTERLEAVE, HK_FORCE, HK_ISVECTORIZED, HK_PREDICATE, @@ -111,7 +111,15 @@ ElementCount getWidth() const { return ElementCount::get(Width.Value, isScalable()); } - unsigned getInterleave() const { return Interleave.Value; } + unsigned getInterleave() const { + if (Interleave.Value) + return Interleave.Value; + // If interleaving is not explicitly set, assume that if we do not want + // unrolling, we also don't want any interleaving. + if (llvm::hasUnrollTransformation(TheLoop) & TM_Disable) + return 1; + return 0; + } unsigned getIsVectorized() const { return IsVectorized.Value; } unsigned getPredicate() const { return Predicate.Value; } enum ForceKind getForce() const { diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -298,7 +298,7 @@ } Optional -llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) { +llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) { Optional Width = getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width"); @@ -311,7 +311,7 @@ return None; } -llvm::Optional llvm::getOptionalIntLoopAttribute(Loop *TheLoop, +llvm::Optional llvm::getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name) { const MDOperand *AttrMD = findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr); @@ -418,7 +418,7 @@ return getBooleanLoopAttribute(L, LLVMLoopMustProgress); } -TransformationMode llvm::hasUnrollTransformation(Loop *L) { +TransformationMode llvm::hasUnrollTransformation(const Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) return TM_SuppressedByUser; @@ -439,7 +439,7 @@ return TM_Unspecified; } -TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) { +TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable")) return TM_SuppressedByUser; @@ -457,7 +457,7 @@ return TM_Unspecified; } -TransformationMode llvm::hasVectorizeTransformation(Loop *L) { +TransformationMode llvm::hasVectorizeTransformation(const Loop *L) { Optional Enable = getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable"); @@ -493,7 +493,7 @@ return TM_Unspecified; } -TransformationMode llvm::hasDistributeTransformation(Loop *L) { +TransformationMode llvm::hasDistributeTransformation(const Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable")) return TM_ForcedByUser; @@ -503,7 +503,7 @@ return TM_Unspecified; } -TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) { +TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable")) return TM_SuppressedByUser; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -57,7 +57,7 @@ switch (Kind) { case HK_WIDTH: return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; - case HK_UNROLL: + case HK_INTERLEAVE: return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; case HK_FORCE: return (Val <= 1); @@ -73,7 +73,7 @@ bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE) : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), - Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), + Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE), Force("vectorize.enable", FK_Undefined, HK_FORCE), IsVectorized("isvectorized", 0, HK_ISVECTORIZED), Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), @@ -91,8 +91,8 @@ // consider the loop to have been already vectorized because there's // nothing more that we can do. IsVectorized.Value = - getWidth() == ElementCount::getFixed(1) && Interleave.Value == 1; - LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs() + getWidth() == ElementCount::getFixed(1) && getInterleave() == 1; + LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs() << "LV: Interleaving disabled by the pass manager\n"); } @@ -165,8 +165,8 @@ R << " (Force=" << NV("Force", true); if (Width.Value != 0) R << ", Vector Width=" << NV("VectorWidth", getWidth()); - if (Interleave.Value != 0) - R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); + if (getInterleave() != 0) + R << ", Interleave Count=" << NV("InterleaveCount", getInterleave()); R << ")"; } return R; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9735,7 +9735,7 @@ ? "enabled" : "?")) << " width=" << Hints.getWidth() - << " unroll=" << Hints.getInterleave() << "\n"); + << " interleave=" << Hints.getInterleave() << "\n"); // Function containing loop Function *F = L->getHeader()->getParent(); diff --git a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll --- a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll @@ -35,7 +35,7 @@ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !8 %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !8 %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !8 - br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !8, !llvm.loop !17 + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !8 } ; Function Attrs: nounwind readonly ssp uwtable @@ -92,8 +92,6 @@ !14 = !{!"Simple C/C++ TBAA"} !15 = !DILocation(line: 6, column: 19, scope: !4) !16 = !DILocation(line: 6, column: 11, scope: !4) -!17 = distinct !{!17, !18} -!18 = !{!"llvm.loop.unroll.disable"} !19 = !DILocation(line: 16, column: 20, scope: !20) !20 = distinct !DISubprogram(name: "cond_sum_loop_hint", scope: !5, file: !5, line: 12, type: !6, isLocal: false, isDefinition: true, scopeLine: 12, flags: DIFlagPrototyped, isOptimized: true, unit: !28, retainedNodes: !7) !21 = !DILocation(line: 16, column: 3, scope: !20) @@ -101,7 +99,7 @@ !23 = !DILocation(line: 20, column: 3, scope: !20) !24 = !DILocation(line: 17, column: 19, scope: !20) !25 = !DILocation(line: 17, column: 11, scope: !20) -!26 = distinct !{!26, !27, !18} +!26 = distinct !{!26, !27} !27 = !{!"llvm.loop.vectorize.enable", i1 true} !28 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", file: !5, diff --git a/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll b/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll --- a/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll +++ b/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll @@ -19,7 +19,7 @@ ; Case 1: Annotated outer loop WITH vector width information must be collected. ; CHECK-LABEL: vector_width -; CHECK: LV: Loop hints: force=enabled width=4 unroll=0 +; CHECK: LV: Loop hints: force=enabled width=4 interleave=0 ; CHECK: LV: We can vectorize this outer loop! ; CHECK: LV: Using user VF 4 to build VPlans. ; CHECK-NOT: LV: Loop hints: force=? @@ -71,7 +71,7 @@ ; Case 2: Annotated outer loop WITHOUT vector width information must be collected. ; CHECK-LABEL: case2 -; CHECK: LV: Loop hints: force=enabled width=0 unroll=0 +; CHECK: LV: Loop hints: force=enabled width=0 interleave=0 ; CHECK: LV: We can vectorize this outer loop! ; CHECK: LV: Using VF 1 to build VPlans. diff --git a/llvm/test/Transforms/LoopVectorize/nounroll.ll b/llvm/test/Transforms/LoopVectorize/nounroll.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/nounroll.ll @@ -0,0 +1,93 @@ +; RUN: opt < %s -passes='loop-vectorize' -debug-only=loop-vectorize -S 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" + +; CHECK: LV: Checking a loop in "f1" +; CHECK: LV: Loop hints: force=? width=0 interleave=1 +define dso_local void @f1(i32 signext %n, i32* %A) { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %0 = trunc i64 %indvars.iv to i32 + store i32 %0, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !1 + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +; CHECK: LV: Checking a loop in "f2" +; CHECK: LV: Loop hints: force=? width=0 interleave=4 +define dso_local void @f2(i32 signext %n, i32* %A) { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %0 = trunc i64 %indvars.iv to i32 + store i32 %0, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !3 + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +; CHECK: LV: Checking a loop in "f3" +; CHECK: LV: Loop hints: force=? width=0 interleave=1 +define dso_local void @f3(i32 signext %n, i32* %A) { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %0 = trunc i64 %indvars.iv to i32 + store i32 %0, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !6 + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +!1 = distinct !{!1, !2} +!2 = !{!"llvm.loop.unroll.disable"} +!3 = distinct !{!3, !4, !5} +!4 = !{!"llvm.loop.unroll.disable"} +!5 = !{!"llvm.loop.interleave.count", i32 4} +!6 = distinct !{!6, !7, !8} +!7 = !{!"llvm.loop.mustprogress"} +!8 = !{!"llvm.loop.unroll.count", i32 1}