Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -5930,18 +5930,23 @@ '``llvm.loop.vectorize.width``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -This metadata sets the target width of the vectorizer. The first -operand is the string ``llvm.loop.vectorize.width`` and the second -operand is an integer specifying the width. For example: +The vector width is an ElementCount tuple, where the first value specifies the +minimum vector width and the second value indicates whether the vectorization +factor is scalable or not. One example of this is shown below: .. code-block:: llvm - !0 = !{!"llvm.loop.vectorize.width", i32 4} + !0 = !{!"llvm.loop.vectorize.width", !1} + !1 = !{i32 4, i1 true} -Note that setting ``llvm.loop.vectorize.width`` to 1 disables -vectorization of the loop. If ``llvm.loop.vectorize.width`` is set to -0 or if the loop does not have this metadata the width will be -determined automatically. +which indicates the loop-vectorizer should use vector-length agnostic +vectorization with a minimum vector width of 4. + +For fixed-width vectorization-factors, a short-hand `i32` operand for +llvm.loop.vectorize.width is also supported: + +.. code-block:: llvm + !0 = !{!"llvm.loop.vectorize.width", i32 4} '``llvm.loop.vectorize.followup_vectorized``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h =================================================================== --- llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -29,6 +29,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Support/TypeSize.h" #include "llvm/Transforms/Utils/LoopUtils.h" namespace llvm { @@ -49,13 +50,21 @@ /// Hint - associates name and validation with the hint value. struct Hint { const char *Name; - unsigned Value; // This may have to change for non-numeric values. + union { + unsigned U32; // Used for boolean and integer hint values. + ElementCount EC; // Used for the vectorization width. + } Value; HintKind Kind; Hint(const char *Name, unsigned Value, HintKind Kind) - : Name(Name), Value(Value), Kind(Kind) {} + : Name(Name), Value({Value}), Kind(Kind) {} - bool validate(unsigned Val); + Hint(const char *Name, ElementCount EC) + : Name(Name), Value{0}, Kind(HK_WIDTH) { + Value.EC = EC; + } + + bool validateAndSet(const Metadata *Arg); }; /// Vectorization width. @@ -98,15 +107,15 @@ /// Dumps all the hint information. void emitRemarkWithHints() const; - unsigned getWidth() const { return Width.Value; } - unsigned getInterleave() const { return Interleave.Value; } - unsigned getIsVectorized() const { return IsVectorized.Value; } - unsigned getPredicate() const { return Predicate.Value; } + ElementCount getWidth() const { return Width.Value.EC; } + unsigned getInterleave() const { return Interleave.Value.U32; } + unsigned getIsVectorized() const { return IsVectorized.Value.U32; } + unsigned getPredicate() const { return Predicate.Value.U32; } enum ForceKind getForce() const { - if ((ForceKind)Force.Value == FK_Undefined && + if ((ForceKind)Force.Value.U32 == FK_Undefined && hasDisableAllTransformsHint(TheLoop)) return FK_Disabled; - return (ForceKind)Force.Value; + return (ForceKind)Force.Value.U32; } /// If hints are provided that force vectorization, use the AlwaysPrint @@ -119,7 +128,9 @@ // enabled by default because can be unsafe or inefficient. For example, // reordering floating-point operations will change the way round-off // error accumulates in the loop. - return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1; + ElementCount EC = getWidth(); + return getForce() == LoopVectorizeHints::FK_Enabled || + EC.getKnownMinValue() > 1; } bool isPotentiallyUnsafe() const { @@ -138,7 +149,7 @@ void getHintsFromMetadata(); /// Checks string hint with one operand and set value if valid. - void setHint(StringRef Name, Metadata *Arg); + void setHint(StringRef Name, const Metadata *Arg); /// The loop these hints belong to. const Loop *TheLoop; Index: llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -56,17 +56,46 @@ namespace llvm { -bool LoopVectorizeHints::Hint::validate(unsigned Val) { +bool LoopVectorizeHints::Hint::validateAndSet(const Metadata *Arg) { + unsigned IntVal; + bool IsScalable = false; + + if (const ConstantInt *C = mdconst::dyn_extract(Arg)) + IntVal = C->getZExtValue(); + else if (const MDNode *MD = dyn_cast(Arg)) { + if (Kind != HK_WIDTH || MD->getNumOperands() != 2) + return false; + const ConstantInt *C0 = + mdconst::dyn_extract(MD->getOperand(0)); + const ConstantInt *C1 = + mdconst::dyn_extract(MD->getOperand(1)); + if (!C0 || !C1) + return false; + IntVal = C0->getZExtValue(); + IsScalable = C1->getZExtValue(); + } else + return false; + + auto ConditionallySetIntValue = [this](unsigned Val, bool Cond) { + if (Cond) + this->Value.U32 = Val; + return Cond; + }; + switch (Kind) { case HK_WIDTH: - return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; + if (!isPowerOf2_32(IntVal) || IntVal > VectorizerParams::MaxVectorWidth) + return false; + Value.EC = ElementCount::get(IntVal, IsScalable); + return true; case HK_UNROLL: - return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; + return ConditionallySetIntValue(IntVal, isPowerOf2_32(IntVal) && + IntVal <= MaxInterleaveFactor); case HK_FORCE: - return (Val <= 1); + return ConditionallySetIntValue(IntVal, IntVal <= 1); case HK_ISVECTORIZED: case HK_PREDICATE: - return (Val == 0 || Val == 1); + return ConditionallySetIntValue(IntVal, IntVal == 0 || IntVal == 1); } return false; } @@ -74,25 +103,27 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE) - : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), + : Width("vectorize.width", + ElementCount::getFixed(VectorizerParams::VectorizationFactor)), Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), Force("vectorize.enable", FK_Undefined, HK_FORCE), IsVectorized("isvectorized", 0, HK_ISVECTORIZED), - Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L), - ORE(ORE) { + Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), + TheLoop(L), ORE(ORE) { // Populate values with existing loop metadata. getHintsFromMetadata(); // force-vector-interleave overrides DisableInterleaving. if (VectorizerParams::isInterleaveForced()) - Interleave.Value = VectorizerParams::VectorizationInterleave; + Interleave.Value.U32 = VectorizerParams::VectorizationInterleave; - if (IsVectorized.Value != 1) + if (IsVectorized.Value.U32 != 1) // If the vectorization width and interleaving count are both 1 then // consider the loop to have been already vectorized because there's // nothing more that we can do. - IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; - LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs() + IsVectorized.Value.U32 = Width.Value.EC == ElementCount::getFixed(1) && + Interleave.Value.U32 == 1; + LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value.U32 == 1) dbgs() << "LV: Interleaving disabled by the pass manager\n"); } @@ -112,7 +143,7 @@ TheLoop->setLoopID(NewLoopID); // Update internal cache. - IsVectorized.Value = 1; + IsVectorized.Value.U32 = 1; } bool LoopVectorizeHints::allowVectorization( @@ -152,7 +183,7 @@ using namespace ore; ORE.emit([&]() { - if (Force.Value == LoopVectorizeHints::FK_Disabled) + if (Force.Value.U32 == LoopVectorizeHints::FK_Disabled) return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", TheLoop->getStartLoc(), TheLoop->getHeader()) @@ -161,12 +192,13 @@ OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(), TheLoop->getHeader()); R << "loop not vectorized"; - if (Force.Value == LoopVectorizeHints::FK_Enabled) { + if (Force.Value.U32 == LoopVectorizeHints::FK_Enabled) { R << " (Force=" << NV("Force", true); - if (Width.Value != 0) - R << ", Vector Width=" << NV("VectorWidth", Width.Value); - if (Interleave.Value != 0) - R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); + if (Width.Value.EC.isNonZero()) + R << ", Vector Width=" << NV("VectorWidth", Width.Value.EC); + if (Interleave.Value.U32 != 0) + R << ", Interleave Count=" + << NV("InterleaveCount", Interleave.Value.U32); R << ")"; } return R; @@ -175,11 +207,11 @@ } const char *LoopVectorizeHints::vectorizeAnalysisPassName() const { - if (getWidth() == 1) + if (getWidth() == ElementCount::getFixed(1)) return LV_NAME; if (getForce() == LoopVectorizeHints::FK_Disabled) return LV_NAME; - if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) + if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero()) return LV_NAME; return OptimizationRemarkAnalysis::AlwaysPrint; } @@ -220,22 +252,19 @@ } } -void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { +void LoopVectorizeHints::setHint(StringRef Name, const Metadata *Arg) { if (!Name.startswith(Prefix())) return; Name = Name.substr(Prefix().size(), StringRef::npos); const ConstantInt *C = mdconst::dyn_extract(Arg); - if (!C) + if (!C && Name != Width.Name) return; - unsigned Val = C->getZExtValue(); Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; for (auto H : Hints) { if (Name == H->Name) { - if (H->validate(Val)) - H->Value = Val; - else + if (!H->validateAndSet(Arg)) LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); break; } Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8260,11 +8260,10 @@ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE); // Get user vectorization factor. - const unsigned UserVF = Hints.getWidth(); + const ElementCount UserVF = Hints.getWidth(); // Plan how to best vectorize, return the best VF and its cost. - const VectorizationFactor VF = - LVP.planInVPlanNativePath(ElementCount::getFixed(UserVF)); + const VectorizationFactor VF = LVP.planInVPlanNativePath(UserVF); // If we are stress testing VPlan builds, do not attempt to generate vector // code. Masked vector code generation support will follow soon. @@ -8426,12 +8425,11 @@ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE); // Get user vectorization factor and interleave count. - unsigned UserVF = Hints.getWidth(); + ElementCount UserVF = Hints.getWidth(); unsigned UserIC = Hints.getInterleave(); // Plan how to best vectorize, return the best VF and its cost. - Optional MaybeVF = - LVP.plan(ElementCount::getFixed(UserVF), UserIC); + Optional MaybeVF = LVP.plan(UserVF, UserIC); VectorizationFactor VF = VectorizationFactor::Disabled(); unsigned IC = 1; Index: llvm/test/Transforms/LoopVectorize/metadata-width.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/metadata-width.ll +++ llvm/test/Transforms/LoopVectorize/metadata-width.ll @@ -24,7 +24,32 @@ ret void } +; CHECK-LABEL: @test2( +; CHECK: store <8 x i32> +; CHECK: ret void +define void @test2(i32* nocapture %a, i32 %n) #0 { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = trunc i64 %indvars.iv to i32 + store i32 %0, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 + +for.end: ; preds = %for.body, %entry + ret void +} + attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } !0 = !{!0, !1} !1 = !{!"llvm.loop.vectorize.width", i32 8} +!2 = !{!2, !3} +!3 = !{!"llvm.loop.vectorize.width", !4} +!4 = !{i32 8, i1 false}