Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -5874,8 +5874,11 @@ '``llvm.loop.vectorize.width``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -This metadata sets the target width of the vectorizer. The first -operand is the string ``llvm.loop.vectorize.width`` and the second +This metadata sets the target width of the vectorizer. We support two +formats for specifying the width - either as a simple integer or as an +ElementCount that also indicates whether the vectors should be fixed +width or scalable. In both formats the first operand is the string +``llvm.loop.vectorize.width``. When using the first format the second operand is an integer specifying the width. For example: .. code-block:: llvm @@ -5887,6 +5890,24 @@ 0 or if the loop does not have this metadata the width will be determined automatically. +In the second format we can write the attribute as follows: + +.. code-block:: llvm + + !0 = !{!"llvm.loop.vectorize.width", !1} + !1 = !{i32 4, i32 1} + +where ``i32 4`` specifies the vector width and ``i32 1`` indicates the +vectors should be scalable. When vectors are marked as scalable then +we may still vectorize even with a vector width of 1. + +For fixed width vectors you can also write + +.. code-block:: llvm + + !1 = !{i32 4, i32 0} + + '``llvm.loop.vectorize.followup_vectorized``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h =================================================================== --- llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -29,6 +29,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Support/TypeSize.h" #include "llvm/Transforms/Utils/LoopUtils.h" namespace llvm { @@ -49,13 +50,20 @@ /// Hint - associates name and validation with the hint value. struct Hint { const char *Name; - unsigned Value; // This may have to change for non-numeric values. + union { + unsigned U32; // Used for boolean and integer hint values. + ElementCount EC; // Used for the vectorization width. + } Value; HintKind Kind; Hint(const char *Name, unsigned Value, HintKind Kind) - : Name(Name), Value(Value), Kind(Kind) {} + : Name(Name), Value({Value}), Kind(Kind) {} - bool validate(unsigned Val); + Hint(const char *Name, ElementCount EC) : Name(Name), Kind(HK_WIDTH) { + Value.EC = EC; + } + + bool validateAndSet(const Metadata *Arg); }; /// Vectorization width. @@ -98,15 +106,15 @@ /// Dumps all the hint information. void emitRemarkWithHints() const; - unsigned getWidth() const { return Width.Value; } - unsigned getInterleave() const { return Interleave.Value; } - unsigned getIsVectorized() const { return IsVectorized.Value; } - unsigned getPredicate() const { return Predicate.Value; } + ElementCount getWidth() const { return Width.Value.EC; } + unsigned getInterleave() const { return Interleave.Value.U32; } + unsigned getIsVectorized() const { return IsVectorized.Value.U32; } + unsigned getPredicate() const { return Predicate.Value.U32; } enum ForceKind getForce() const { - if ((ForceKind)Force.Value == FK_Undefined && + if ((ForceKind)Force.Value.U32 == FK_Undefined && hasDisableAllTransformsHint(TheLoop)) return FK_Disabled; - return (ForceKind)Force.Value; + return (ForceKind)Force.Value.U32; } /// If hints are provided that force vectorization, use the AlwaysPrint @@ -119,7 +127,9 @@ // enabled by default because can be unsafe or inefficient. For example, // reordering floating-point operations will change the way round-off // error accumulates in the loop. - return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1; + ElementCount EC = getWidth(); + return getForce() == LoopVectorizeHints::FK_Enabled || + EC.getKnownMinValue() > 1; } bool isPotentiallyUnsafe() const { @@ -138,7 +148,7 @@ void getHintsFromMetadata(); /// Checks string hint with one operand and set value if valid. - void setHint(StringRef Name, Metadata *Arg); + void setHint(StringRef Name, const Metadata *Arg); /// The loop these hints belong to. const Loop *TheLoop; Index: llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -56,17 +56,56 @@ namespace llvm { -bool LoopVectorizeHints::Hint::validate(unsigned Val) { +bool LoopVectorizeHints::Hint::validateAndSet(const Metadata *Arg) { + unsigned IntVal; + unsigned IsScalable = 0; + + if (const ConstantInt *C = mdconst::dyn_extract(Arg)) + IntVal = C->getZExtValue(); + else if (const MDNode *MD = dyn_cast(Arg)) { + if (Kind != HK_WIDTH || MD->getNumOperands() != 2) + return false; + const ConstantInt *C0 = + mdconst::dyn_extract(MD->getOperand(0)); + const ConstantInt *C1 = + mdconst::dyn_extract(MD->getOperand(1)); + if (!C0 || !C1) + return false; + IntVal = C0->getZExtValue(); + IsScalable = C1->getZExtValue(); + } else + return false; + switch (Kind) { - case HK_WIDTH: - return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; + case HK_WIDTH: { + if (!isPowerOf2_32(IntVal) || IntVal > VectorizerParams::MaxVectorWidth) + return false; + + if (IsScalable > 1) + return false; + + Value.EC = ElementCount::get(IntVal, IsScalable); + return true; + } case HK_UNROLL: - return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; + if (isPowerOf2_32(IntVal) && IntVal <= MaxInterleaveFactor) { + Value.U32 = IntVal; + return true; + } + break; case HK_FORCE: - return (Val <= 1); + if (IntVal <= 1) { + Value.U32 = IntVal; + return true; + } + break; case HK_ISVECTORIZED: case HK_PREDICATE: - return (Val == 0 || Val == 1); + if (IntVal == 0 || IntVal == 1) { + Value.U32 = IntVal; + return true; + } + break; } return false; } @@ -74,25 +113,26 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE) - : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), + : Width("vectorize.width", + ElementCount::getFixed(VectorizerParams::VectorizationFactor)), Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), Force("vectorize.enable", FK_Undefined, HK_FORCE), IsVectorized("isvectorized", 0, HK_ISVECTORIZED), - Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L), - ORE(ORE) { + Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), + TheLoop(L), ORE(ORE) { // Populate values with existing loop metadata. getHintsFromMetadata(); // force-vector-interleave overrides DisableInterleaving. if (VectorizerParams::isInterleaveForced()) - Interleave.Value = VectorizerParams::VectorizationInterleave; + Interleave.Value.U32 = VectorizerParams::VectorizationInterleave; - if (IsVectorized.Value != 1) + if (IsVectorized.Value.U32 != 1) // If the vectorization width and interleaving count are both 1 then // consider the loop to have been already vectorized because there's // nothing more that we can do. - IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; - LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs() + IsVectorized.Value.U32 = Width.Value.EC == 1 && Interleave.Value.U32 == 1; + LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value.U32 == 1) dbgs() << "LV: Interleaving disabled by the pass manager\n"); } @@ -112,7 +152,7 @@ TheLoop->setLoopID(NewLoopID); // Update internal cache. - IsVectorized.Value = 1; + IsVectorized.Value.U32 = 1; } bool LoopVectorizeHints::allowVectorization( @@ -152,7 +192,7 @@ using namespace ore; ORE.emit([&]() { - if (Force.Value == LoopVectorizeHints::FK_Disabled) + if (Force.Value.U32 == LoopVectorizeHints::FK_Disabled) return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", TheLoop->getStartLoc(), TheLoop->getHeader()) @@ -161,12 +201,13 @@ OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(), TheLoop->getHeader()); R << "loop not vectorized"; - if (Force.Value == LoopVectorizeHints::FK_Enabled) { + if (Force.Value.U32 == LoopVectorizeHints::FK_Enabled) { R << " (Force=" << NV("Force", true); - if (Width.Value != 0) - R << ", Vector Width=" << NV("VectorWidth", Width.Value); - if (Interleave.Value != 0) - R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); + if (Width.Value.EC.isNonZero()) + R << ", Vector Width=" << NV("VectorWidth", Width.Value.EC); + if (Interleave.Value.U32 != 0) + R << ", Interleave Count=" + << NV("InterleaveCount", Interleave.Value.U32); R << ")"; } return R; @@ -179,7 +220,7 @@ return LV_NAME; if (getForce() == LoopVectorizeHints::FK_Disabled) return LV_NAME; - if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) + if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero()) return LV_NAME; return OptimizationRemarkAnalysis::AlwaysPrint; } @@ -220,22 +261,19 @@ } } -void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { +void LoopVectorizeHints::setHint(StringRef Name, const Metadata *Arg) { if (!Name.startswith(Prefix())) return; Name = Name.substr(Prefix().size(), StringRef::npos); const ConstantInt *C = mdconst::dyn_extract(Arg); - if (!C) + if (!C && Name != Width.Name) return; - unsigned Val = C->getZExtValue(); Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; for (auto H : Hints) { if (Name == H->Name) { - if (H->validate(Val)) - H->Value = Val; - else + if (!H->validateAndSet(Arg)) LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); break; } Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8201,11 +8201,10 @@ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE); // Get user vectorization factor. - const unsigned UserVF = Hints.getWidth(); + const ElementCount UserVF = Hints.getWidth(); // Plan how to best vectorize, return the best VF and its cost. - const VectorizationFactor VF = - LVP.planInVPlanNativePath(ElementCount::getFixed(UserVF)); + const VectorizationFactor VF = LVP.planInVPlanNativePath(UserVF); // If we are stress testing VPlan builds, do not attempt to generate vector // code. Masked vector code generation support will follow soon. @@ -8367,12 +8366,11 @@ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE); // Get user vectorization factor and interleave count. - unsigned UserVF = Hints.getWidth(); + ElementCount UserVF = Hints.getWidth(); unsigned UserIC = Hints.getInterleave(); // Plan how to best vectorize, return the best VF and its cost. - Optional MaybeVF = - LVP.plan(ElementCount::getFixed(UserVF), UserIC); + Optional MaybeVF = LVP.plan(UserVF, UserIC); VectorizationFactor VF = VectorizationFactor::Disabled(); unsigned IC = 1; Index: llvm/test/Transforms/LoopVectorize/metadata-width.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/metadata-width.ll +++ llvm/test/Transforms/LoopVectorize/metadata-width.ll @@ -24,7 +24,32 @@ ret void } +; CHECK-LABEL: @test2( +; CHECK: store <8 x i32> +; CHECK: ret void +define void @test2(i32* nocapture %a, i32 %n) #0 { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = trunc i64 %indvars.iv to i32 + store i32 %0, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 + +for.end: ; preds = %for.body, %entry + ret void +} + attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } !0 = !{!0, !1} !1 = !{!"llvm.loop.vectorize.width", i32 8} +!2 = !{!2, !3} +!3 = !{!"llvm.loop.vectorize.width", !4} +!4 = !{i32 8, i32 0}