Index: clang/docs/LanguageExtensions.rst =================================================================== --- clang/docs/LanguageExtensions.rst +++ clang/docs/LanguageExtensions.rst @@ -3028,8 +3028,15 @@ ... } -The vector width is specified by ``vectorize_width(_value_)`` and the interleave -count is specified by ``interleave_count(_value_)``, where +The vector width is specified by +``vectorize_width(_value_[, fixed|scalable])``, where __value__ is a positive +integer and the type of vectorization can be specified with an optional +second parameter. In this case 'fixed' is the default and refers to fixed width +vectorization, whereas 'scalable' indicates the compiler should use scalable +vectors instead. The 'scalable' option is currently experimental and is only +intended to work for targets that support scalable vectors. + +The interleave count is specified by ``interleave_count(_value_)``, where _value_ is a positive integer. This is useful for specifying the optimal width/count of the set of target architectures supported by your application. Index: clang/include/clang/Basic/Attr.td =================================================================== --- clang/include/clang/Basic/Attr.td +++ clang/include/clang/Basic/Attr.td @@ -3299,8 +3299,8 @@ "PipelineDisabled", "PipelineInitiationInterval", "Distribute", "VectorizePredicate"]>, EnumArgument<"State", "LoopHintState", - ["enable", "disable", "numeric", "assume_safety", "full"], - ["Enable", "Disable", "Numeric", "AssumeSafety", "Full"]>, + ["enable", "disable", "numeric", "scalable_numeric", "assume_safety", "full"], + ["Enable", "Disable", "Numeric", "ScalableNumeric", "AssumeSafety", "Full"]>, ExprArgument<"Value">]; let AdditionalMembers = [{ Index: clang/include/clang/Basic/DiagnosticParseKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticParseKinds.td +++ clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1386,6 +1386,9 @@ "%select{invalid|missing}0 option%select{ %1|}0; expected vectorize, " "vectorize_width, interleave, interleave_count, unroll, unroll_count, " "pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute">; +def err_pragma_loop_invalid_vectorize_option : Error< + "vectorize_width loop hint malformed; use 'vectorize_width(X, scalable)' or " + "'vectorize_width(X, fixed)' where X is an integer">; def err_pragma_fp_invalid_option : Error< "%select{invalid|missing}0 option%select{ %1|}0; expected 'contract', 'reassociate' or 'exceptions'">; Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -935,6 +935,9 @@ def err_pragma_attr_attr_no_push : Error< "'#pragma clang attribute' attribute with no matching " "'#pragma clang attribute push'">; +def warn_pragma_attribute_scalable_unused : Warning< + "ignoring scalable vectorize_width flag due to lack of target support">, + InGroup; /// Objective-C parser diagnostics def err_duplicate_class_def : Error< Index: clang/include/clang/Basic/TargetInfo.h =================================================================== --- clang/include/clang/Basic/TargetInfo.h +++ clang/include/clang/Basic/TargetInfo.h @@ -1237,6 +1237,8 @@ /// Identify whether this target supports IFuncs. bool supportsIFunc() const { return getTriple().isOSBinFormatELF(); } + virtual bool supportsScalableVectors() const { return false; } + // Validate the contents of the __builtin_cpu_supports(const char*) // argument. virtual bool validateCpuSupports(StringRef Name) const { return false; } Index: clang/lib/Basic/Targets/AArch64.h =================================================================== --- clang/lib/Basic/Targets/AArch64.h +++ clang/lib/Basic/Targets/AArch64.h @@ -130,6 +130,8 @@ bool hasInt128Type() const override; bool hasExtIntType() const override { return true; } + + bool supportsScalableVectors() const override { return FPU & SveMode; } }; class LLVM_LIBRARY_VISIBILITY AArch64leTargetInfo : public AArch64TargetInfo { Index: clang/lib/CodeGen/CGLoopInfo.h =================================================================== --- clang/lib/CodeGen/CGLoopInfo.h +++ clang/lib/CodeGen/CGLoopInfo.h @@ -19,6 +19,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Value.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/TypeSize.h" namespace llvm { class BasicBlock; @@ -56,7 +57,7 @@ LVEnableState VectorizePredicateEnable; /// Value for llvm.loop.vectorize.width metadata. - unsigned VectorizeWidth; + llvm::ElementCount VectorizeWidth; /// Value for llvm.loop.interleave.count metadata. unsigned InterleaveCount; @@ -253,7 +254,9 @@ } /// Set the vectorize width for the next loop pushed. - void setVectorizeWidth(unsigned W) { StagedAttrs.VectorizeWidth = W; } + void setVectorizeWidth(unsigned W, bool IsScalable = false) { + StagedAttrs.VectorizeWidth = llvm::ElementCount::get(W, IsScalable); + } /// Set the interleave count for the next loop pushed. void setInterleaveCount(unsigned C) { StagedAttrs.InterleaveCount = C; } Index: clang/lib/CodeGen/CGLoopInfo.cpp =================================================================== --- clang/lib/CodeGen/CGLoopInfo.cpp +++ clang/lib/CodeGen/CGLoopInfo.cpp @@ -217,7 +217,7 @@ Enabled = false; else if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified || - Attrs.InterleaveCount != 0 || Attrs.VectorizeWidth != 0) + Attrs.InterleaveCount != 0 || Attrs.VectorizeWidth.isNonZero()) Enabled = true; if (Enabled != true) { @@ -253,7 +253,7 @@ bool IsVectorPredicateEnabled = false; if (Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified && Attrs.VectorizeEnable != LoopAttributes::Disable && - Attrs.VectorizeWidth < 1) { + Attrs.VectorizeWidth.getKnownMinValue() < 1) { IsVectorPredicateEnabled = (Attrs.VectorizePredicateEnable == LoopAttributes::Enable); @@ -266,12 +266,25 @@ } // Setting vectorize.width - if (Attrs.VectorizeWidth > 0) { - Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.vectorize.width"), - ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), - Attrs.VectorizeWidth))}; - Args.push_back(MDNode::get(Ctx, Vals)); + if (Attrs.VectorizeWidth.getKnownMinValue() > 0) { + if (Attrs.VectorizeWidth.isScalable()) { + Metadata *ECArgs[] = { + ConstantAsMetadata::get( + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + Attrs.VectorizeWidth.getKnownMinValue())), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), Attrs.VectorizeWidth.isScalable()))}; + MDNode *EC = MDNode::get(Ctx, ECArgs); + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.width"), EC}; + Args.push_back(MDNode::get(Ctx, Vals)); + } else { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.width"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), + Attrs.VectorizeWidth.getKnownMinValue()))}; + + Args.push_back(MDNode::get(Ctx, Vals)); + } } // Setting interleave.count @@ -288,8 +301,9 @@ // 2) it is implied when vectorize.predicate is set, or // 3) it is implied when vectorize.width is set. if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || - IsVectorPredicateEnabled || - Attrs.VectorizeWidth > 1 ) { + IsVectorPredicateEnabled || Attrs.VectorizeWidth.getKnownMinValue() > 1 || + (Attrs.VectorizeWidth.getKnownMinValue() == 1 && + Attrs.VectorizeWidth.isScalable())) { bool AttrVal = Attrs.VectorizeEnable != LoopAttributes::Disable; Args.push_back( MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), @@ -428,14 +442,15 @@ : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified), UnrollEnable(LoopAttributes::Unspecified), UnrollAndJamEnable(LoopAttributes::Unspecified), - VectorizePredicateEnable(LoopAttributes::Unspecified), VectorizeWidth(0), - InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0), + VectorizePredicateEnable(LoopAttributes::Unspecified), + VectorizeWidth(ElementCount::getFixed(0)), InterleaveCount(0), + UnrollCount(0), UnrollAndJamCount(0), DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false), PipelineInitiationInterval(0) {} void LoopAttributes::clear() { IsParallel = false; - VectorizeWidth = 0; + VectorizeWidth = ElementCount::getFixed(0); InterleaveCount = 0; UnrollCount = 0; UnrollAndJamCount = 0; @@ -460,7 +475,7 @@ AccGroup = MDNode::getDistinct(Ctx, {}); } - if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && + if (!Attrs.IsParallel && Attrs.VectorizeWidth.isZero() && Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && Attrs.PipelineInitiationInterval == 0 && @@ -538,7 +553,7 @@ SmallVector BeforeLoopProperties; if (BeforeJam.VectorizeEnable != LoopAttributes::Unspecified || BeforeJam.VectorizePredicateEnable != LoopAttributes::Unspecified || - BeforeJam.InterleaveCount != 0 || BeforeJam.VectorizeWidth != 0) + BeforeJam.InterleaveCount != 0 || BeforeJam.VectorizeWidth.isNonZero()) BeforeLoopProperties.push_back( MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); @@ -717,6 +732,16 @@ break; } break; + case LoopHintAttr::ScalableNumeric: + switch (Option) { + case LoopHintAttr::VectorizeWidth: + setVectorizeWidth(ValueInt, true); + break; + default: + llvm_unreachable("Options cannot be used with 'scalable' hint."); + break; + } + break; case LoopHintAttr::Numeric: switch (Option) { case LoopHintAttr::VectorizeWidth: Index: clang/lib/Parse/ParsePragma.cpp =================================================================== --- clang/lib/Parse/ParsePragma.cpp +++ clang/lib/Parse/ParsePragma.cpp @@ -1195,6 +1195,24 @@ ExprResult R = ParseConstantExpression(); + if (OptionInfo && OptionInfo->getName() == "vectorize_width" && + Tok.is(tok::comma)) { + PP.Lex(Tok); // , + + SourceLocation StateLoc = Tok.getLocation(); + IdentifierInfo *StateInfo = Tok.getIdentifierInfo(); + StringRef IsScalableStr = StateInfo->getName(); + + if (IsScalableStr != "scalable" && IsScalableStr != "fixed") { + Diag(Tok.getLocation(), diag::err_pragma_loop_invalid_vectorize_option); + return false; + } + PP.Lex(Tok); // Identifier + + Hint.StateLoc = + IdentifierLoc::create(Actions.Context, StateLoc, StateInfo); + } + // Tokens following an error in an ill-formed constant expression will // remain in the token stream and must be removed. if (Tok.isNot(tok::eof)) { Index: clang/lib/Sema/SemaStmtAttr.cpp =================================================================== --- clang/lib/Sema/SemaStmtAttr.cpp +++ clang/lib/Sema/SemaStmtAttr.cpp @@ -14,6 +14,7 @@ #include "clang/Sema/SemaInternal.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" #include "clang/Sema/DelayedDiagnostic.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/ScopeInfo.h" @@ -139,10 +140,21 @@ LoopHintAttr::PipelineInitiationInterval) .Case("distribute", LoopHintAttr::Distribute) .Default(LoopHintAttr::Vectorize); - if (Option == LoopHintAttr::VectorizeWidth || - Option == LoopHintAttr::InterleaveCount || - Option == LoopHintAttr::UnrollCount || - Option == LoopHintAttr::PipelineInitiationInterval) { + if (Option == LoopHintAttr::VectorizeWidth) { + assert(ValueExpr && "Attribute must have a valid value expression."); + if (S.CheckLoopHintExpr(ValueExpr, St->getBeginLoc())) + return nullptr; + if (StateLoc && StateLoc->Ident && StateLoc->Ident->isStr("scalable")) { + if (!S.Context.getTargetInfo().supportsScalableVectors()) { + S.Diag(St->getBeginLoc(), diag::warn_pragma_attribute_scalable_unused); + State = LoopHintAttr::Numeric; + } else + State = LoopHintAttr::ScalableNumeric; + } else + State = LoopHintAttr::Numeric; + } else if (Option == LoopHintAttr::InterleaveCount || + Option == LoopHintAttr::UnrollCount || + Option == LoopHintAttr::PipelineInitiationInterval) { assert(ValueExpr && "Attribute must have a valid value expression."); if (S.CheckLoopHintExpr(ValueExpr, St->getBeginLoc())) return nullptr; Index: clang/test/CodeGenCXX/pragma-loop.cpp =================================================================== --- clang/test/CodeGenCXX/pragma-loop.cpp +++ clang/test/CodeGenCXX/pragma-loop.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s 2>%t | FileCheck %s +// RUN: FileCheck --check-prefix=CHECK-SCALABLE %s < %t // Verify while loop is recognized after sequence of pragma clang loop directives. void while_test(int *List, int Length) { @@ -158,6 +159,26 @@ for_template_constant_expression_test(List, Length); } +// Verify for loop is performing fixed width vectorization +void for_test_fixed(int *List, int Length) { +#pragma clang loop vectorize_width(16, fixed) interleave_count(4) unroll(disable) distribute(disable) + for (int i = 0; i < Length; i++) { + // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_15:.*]] + List[i] = i * 2; + } +} + +// Verify for loop rejects scalable vectorization due to lack of target support +// CHECK-SCALABLE: ignoring scalable vectorize_width flag due to lack of target support +void for_test_scalable(int *List, int Length) { +#pragma clang loop vectorize_width(16, scalable) interleave_count(4) unroll(disable) distribute(disable) + for (int i = 0; i < Length; i++) { + // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_16:.*]] + // CHECK-SVE: br label {{.*}}, !llvm.loop ![[LOOP_16_SVE:.*]] + List[i] = i * 2; + } +} + // CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[UNROLL_FULL:.*]]} // CHECK: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"} @@ -215,3 +236,8 @@ // CHECK: ![[LOOP_14]] = distinct !{![[LOOP_14]], ![[WIDTH_10:.*]], ![[VECTORIZE_ENABLE]]} // CHECK: ![[WIDTH_10]] = !{!"llvm.loop.vectorize.width", i32 10} + +// CHECK: ![[LOOP_15]] = distinct !{![[LOOP_15]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[WIDTH_16_FIXED:.*]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE:.*]]} +// CHECK: ![[WIDTH_16_FIXED]] = !{!"llvm.loop.vectorize.width", i32 16} + +// CHECK: ![[LOOP_16]] = distinct !{![[LOOP_16]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[WIDTH_16_FIXED:.*]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE:.*]]} Index: clang/test/CodeGenCXX/pragma-scalable-loop.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenCXX/pragma-scalable-loop.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +sve -std=c++11 -emit-llvm -o - %s | FileCheck %s + +// Verify do loop is performing scalable vectorization +void for_test_scalable(int *List, int Length) { +#pragma clang loop vectorize_width(16, scalable) interleave_count(4) unroll(disable) distribute(disable) + for (int i = 0; i < Length; i++) { + // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_1:.*]] + List[i] = i * 2; + } +} + +// CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[WIDTH_16_SCALABLE:.*]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE:.*]]} +// CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"} +// CHECK: ![[DISTRIBUTE_DISABLE]] = !{!"llvm.loop.distribute.enable", i1 false} +// CHECK: ![[WIDTH_16_SCALABLE]] = !{!"llvm.loop.vectorize.width", ![[ELEMENT_COUNT_16_SCALABLE:.*]]} +// CHECK: ![[ELEMENT_COUNT_16_SCALABLE]] = !{i32 16, i1 true} +// CHECK: ![[INTERLEAVE_4]] = !{!"llvm.loop.interleave.count", i32 4} +// CHECK: ![[VECTORIZE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}