Index: cfe/trunk/docs/LanguageExtensions.rst =================================================================== --- cfe/trunk/docs/LanguageExtensions.rst +++ cfe/trunk/docs/LanguageExtensions.rst @@ -2946,12 +2946,12 @@ The ``#pragma clang loop`` directive is used to specify hints for optimizing the subsequent for, while, do-while, or c++11 range-based for loop. The directive -provides options for vectorization, interleaving, unrolling and +provides options for vectorization, interleaving, predication, unrolling and distribution. Loop hints can be specified before any loop and will be ignored if the optimization is not safe to apply. -Vectorization and Interleaving ------------------------------- +Vectorization, Interleaving, and Predication +-------------------------------------------- A vectorized loop performs multiple iterations of the original loop in parallel using vector instructions. The instruction set of the target @@ -2994,6 +2994,21 @@ Specifying a width/count of 1 disables the optimization, and is equivalent to ``vectorize(disable)`` or ``interleave(disable)``. +Vector predication is enabled by ``vectorize_predicate(enable)``, for example: + +.. code-block:: c++ + + #pragma clang loop vectorize(enable) + #pragma clang loop vectorize_predicate(enable) + for(...) { + ... + } + +This predicates (masks) all instructions in the loop, which allows the scalar +remainder loop (the tail) to be folded into the main vectorized loop. This +might be more efficient when vector predication is efficiently supported by the +target platform. + Loop Unrolling -------------- Index: cfe/trunk/include/clang/Basic/Attr.td =================================================================== --- cfe/trunk/include/clang/Basic/Attr.td +++ cfe/trunk/include/clang/Basic/Attr.td @@ -2982,10 +2982,12 @@ let Args = [EnumArgument<"Option", "OptionType", ["vectorize", "vectorize_width", "interleave", "interleave_count", "unroll", "unroll_count", "unroll_and_jam", "unroll_and_jam_count", - "pipeline", "pipeline_initiation_interval", "distribute"], + "pipeline", "pipeline_initiation_interval", "distribute", + "vectorize_predicate"], ["Vectorize", "VectorizeWidth", "Interleave", "InterleaveCount", "Unroll", "UnrollCount", "UnrollAndJam", "UnrollAndJamCount", - "PipelineDisabled", "PipelineInitiationInterval", "Distribute"]>, + "PipelineDisabled", "PipelineInitiationInterval", "Distribute", + "VectorizePredicate"]>, EnumArgument<"State", "LoopHintState", ["enable", "disable", "numeric", "assume_safety", "full"], ["Enable", "Disable", "Numeric", "AssumeSafety", "Full"]>, @@ -3005,6 +3007,7 @@ case PipelineDisabled: return "pipeline"; case PipelineInitiationInterval: return "pipeline_initiation_interval"; case Distribute: return "distribute"; + case VectorizePredicate: return "vectorize_predicate"; } llvm_unreachable("Unhandled LoopHint option."); } Index: cfe/trunk/include/clang/Basic/AttrDocs.td =================================================================== --- cfe/trunk/include/clang/Basic/AttrDocs.td +++ cfe/trunk/include/clang/Basic/AttrDocs.td @@ -2716,9 +2716,10 @@ let Content = [{ The ``#pragma clang loop`` directive allows loop optimization hints to be specified for the subsequent loop. The directive allows pipelining to be -disabled, or vectorization, interleaving, and unrolling to be enabled or disabled. -Vector width, interleave count, unrolling count, and the initiation interval -for pipelining can be explicitly specified. See `language extensions +disabled, or vectorization, vector predication, interleaving, and unrolling to +be enabled or disabled. Vector width, vector predication, interleave count, +unrolling count, and the initiation interval for pipelining can be explicitly +specified. See `language extensions `_ for details. }]; @@ -4201,4 +4202,4 @@ not initialized on device side. It has internal linkage and is initialized by the initializer on host side. }]; -} \ No newline at end of file +} Index: cfe/trunk/include/clang/Basic/DiagnosticParseKinds.td =================================================================== --- cfe/trunk/include/clang/Basic/DiagnosticParseKinds.td +++ cfe/trunk/include/clang/Basic/DiagnosticParseKinds.td @@ -1210,7 +1210,7 @@ def err_pragma_loop_invalid_option : Error< "%select{invalid|missing}0 option%select{ %1|}0; expected vectorize, " "vectorize_width, interleave, interleave_count, unroll, unroll_count, " - "pipeline, pipeline_initiation_interval, or distribute">; + "pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute">; def err_pragma_fp_invalid_option : Error< "%select{invalid|missing}0 option%select{ %1|}0; expected contract">; Index: cfe/trunk/lib/CodeGen/CGLoopInfo.h =================================================================== --- cfe/trunk/lib/CodeGen/CGLoopInfo.h +++ cfe/trunk/lib/CodeGen/CGLoopInfo.h @@ -51,6 +51,9 @@ /// Value for llvm.loop.unroll_and_jam.* metadata (enable, disable, or full). LVEnableState UnrollAndJamEnable; + /// Value for llvm.loop.vectorize.predicate metadata + LVEnableState VectorizePredicateEnable; + /// Value for llvm.loop.vectorize.width metadata. unsigned VectorizeWidth; @@ -237,6 +240,11 @@ StagedAttrs.UnrollEnable = State; } + /// Set the next pushed vectorize predicate state. + void setVectorizePredicateState(const LoopAttributes::LVEnableState &State) { + StagedAttrs.VectorizePredicateEnable = State; + } + /// Set the next pushed loop unroll_and_jam state. void setUnrollAndJamState(const LoopAttributes::LVEnableState &State) { StagedAttrs.UnrollAndJamEnable = State; Index: cfe/trunk/lib/CodeGen/CGLoopInfo.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGLoopInfo.cpp +++ cfe/trunk/lib/CodeGen/CGLoopInfo.cpp @@ -218,6 +218,7 @@ if (Attrs.VectorizeEnable == LoopAttributes::Disable) Enabled = false; else if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || + Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified || Attrs.InterleaveCount != 0 || Attrs.VectorizeWidth != 0) Enabled = true; @@ -251,6 +252,16 @@ Args.push_back(TempNode.get()); Args.append(LoopProperties.begin(), LoopProperties.end()); + // Setting vectorize.predicate + if (Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.vectorize.predicate.enable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), + (Attrs.VectorizePredicateEnable == LoopAttributes::Enable)))}; + Args.push_back(MDNode::get(Ctx, Vals)); + } + // Setting vectorize.width if (Attrs.VectorizeWidth > 0) { Metadata *Vals[] = { @@ -411,7 +422,8 @@ LoopAttributes::LoopAttributes(bool IsParallel) : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified), UnrollEnable(LoopAttributes::Unspecified), - UnrollAndJamEnable(LoopAttributes::Unspecified), VectorizeWidth(0), + UnrollAndJamEnable(LoopAttributes::Unspecified), + VectorizePredicateEnable(LoopAttributes::Unspecified), VectorizeWidth(0), InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0), DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false), PipelineInitiationInterval(0) {} @@ -425,6 +437,7 @@ VectorizeEnable = LoopAttributes::Unspecified; UnrollEnable = LoopAttributes::Unspecified; UnrollAndJamEnable = LoopAttributes::Unspecified; + VectorizePredicateEnable = LoopAttributes::Unspecified; DistributeEnable = LoopAttributes::Unspecified; PipelineDisabled = false; PipelineInitiationInterval = 0; @@ -446,6 +459,7 @@ Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && Attrs.PipelineInitiationInterval == 0 && + Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified && Attrs.VectorizeEnable == LoopAttributes::Unspecified && Attrs.UnrollEnable == LoopAttributes::Unspecified && Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && @@ -480,6 +494,7 @@ BeforeJam.InterleaveCount = Attrs.InterleaveCount; BeforeJam.VectorizeEnable = Attrs.VectorizeEnable; BeforeJam.DistributeEnable = Attrs.DistributeEnable; + BeforeJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable; switch (Attrs.UnrollEnable) { case LoopAttributes::Unspecified: @@ -495,6 +510,7 @@ break; } + AfterJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable; AfterJam.UnrollCount = Attrs.UnrollCount; AfterJam.PipelineDisabled = Attrs.PipelineDisabled; AfterJam.PipelineInitiationInterval = Attrs.PipelineInitiationInterval; @@ -516,6 +532,7 @@ // add it manually. SmallVector BeforeLoopProperties; if (BeforeJam.VectorizeEnable != LoopAttributes::Unspecified || + BeforeJam.VectorizePredicateEnable != LoopAttributes::Unspecified || BeforeJam.InterleaveCount != 0 || BeforeJam.VectorizeWidth != 0) BeforeLoopProperties.push_back( MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); @@ -603,6 +620,9 @@ case LoopHintAttr::UnrollAndJam: setUnrollAndJamState(LoopAttributes::Disable); break; + case LoopHintAttr::VectorizePredicate: + setVectorizePredicateState(LoopAttributes::Disable); + break; case LoopHintAttr::Distribute: setDistributeState(false); break; @@ -630,6 +650,9 @@ case LoopHintAttr::UnrollAndJam: setUnrollAndJamState(LoopAttributes::Enable); break; + case LoopHintAttr::VectorizePredicate: + setVectorizePredicateState(LoopAttributes::Enable); + break; case LoopHintAttr::Distribute: setDistributeState(true); break; @@ -653,6 +676,7 @@ break; case LoopHintAttr::Unroll: case LoopHintAttr::UnrollAndJam: + case LoopHintAttr::VectorizePredicate: case LoopHintAttr::UnrollCount: case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: @@ -681,6 +705,7 @@ case LoopHintAttr::Distribute: case LoopHintAttr::PipelineDisabled: case LoopHintAttr::PipelineInitiationInterval: + case LoopHintAttr::VectorizePredicate: llvm_unreachable("Options cannot be used with 'full' hint."); break; } @@ -704,6 +729,7 @@ break; case LoopHintAttr::Unroll: case LoopHintAttr::UnrollAndJam: + case LoopHintAttr::VectorizePredicate: case LoopHintAttr::Vectorize: case LoopHintAttr::Interleave: case LoopHintAttr::Distribute: Index: cfe/trunk/lib/Parse/ParsePragma.cpp =================================================================== --- cfe/trunk/lib/Parse/ParsePragma.cpp +++ cfe/trunk/lib/Parse/ParsePragma.cpp @@ -1071,6 +1071,7 @@ StateOption = llvm::StringSwitch(OptionInfo->getName()) .Case("vectorize", true) .Case("interleave", true) + .Case("vectorize_predicate", true) .Default(false) || OptionUnroll || OptionUnrollAndJam || OptionDistribute || OptionPipelineDisabled; @@ -2824,6 +2825,7 @@ /// 'vectorize' '(' loop-hint-keyword ')' /// 'interleave' '(' loop-hint-keyword ')' /// 'unroll' '(' unroll-hint-keyword ')' +/// 'vectorize_predicate' '(' loop-hint-keyword ')' /// 'vectorize_width' '(' loop-hint-value ')' /// 'interleave_count' '(' loop-hint-value ')' /// 'unroll_count' '(' loop-hint-value ')' @@ -2885,6 +2887,7 @@ .Case("interleave", true) .Case("unroll", true) .Case("distribute", true) + .Case("vectorize_predicate", true) .Case("vectorize_width", true) .Case("interleave_count", true) .Case("unroll_count", true) Index: cfe/trunk/lib/Sema/SemaStmtAttr.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaStmtAttr.cpp +++ cfe/trunk/lib/Sema/SemaStmtAttr.cpp @@ -133,6 +133,7 @@ .Case("vectorize", LoopHintAttr::Vectorize) .Case("vectorize_width", LoopHintAttr::VectorizeWidth) .Case("interleave", LoopHintAttr::Interleave) + .Case("vectorize_predicate", LoopHintAttr::VectorizePredicate) .Case("interleave_count", LoopHintAttr::InterleaveCount) .Case("unroll", LoopHintAttr::Unroll) .Case("unroll_count", LoopHintAttr::UnrollCount) @@ -151,6 +152,7 @@ State = LoopHintAttr::Numeric; } else if (Option == LoopHintAttr::Vectorize || Option == LoopHintAttr::Interleave || + Option == LoopHintAttr::VectorizePredicate || Option == LoopHintAttr::Unroll || Option == LoopHintAttr::Distribute || Option == LoopHintAttr::PipelineDisabled) { @@ -189,7 +191,8 @@ const LoopHintAttr *StateAttr; const LoopHintAttr *NumericAttr; } HintAttrs[] = {{nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}, - {nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}}; + {nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}, + {nullptr, nullptr}}; for (const auto *I : Attrs) { const LoopHintAttr *LH = dyn_cast(I); @@ -205,7 +208,8 @@ Unroll, UnrollAndJam, Distribute, - Pipeline + Pipeline, + VectorizePredicate } Category; switch (Option) { case LoopHintAttr::Vectorize: @@ -232,6 +236,9 @@ case LoopHintAttr::PipelineInitiationInterval: Category = Pipeline; break; + case LoopHintAttr::VectorizePredicate: + Category = VectorizePredicate; + break; }; assert(Category < sizeof(HintAttrs) / sizeof(HintAttrs[0])); @@ -240,6 +247,7 @@ if (Option == LoopHintAttr::Vectorize || Option == LoopHintAttr::Interleave || Option == LoopHintAttr::Unroll || Option == LoopHintAttr::UnrollAndJam || + Option == LoopHintAttr::VectorizePredicate || Option == LoopHintAttr::PipelineDisabled || Option == LoopHintAttr::Distribute) { // Enable|Disable|AssumeSafety hint. For example, vectorize(enable). Index: cfe/trunk/test/AST/ast-print-pragmas.cpp =================================================================== --- cfe/trunk/test/AST/ast-print-pragmas.cpp +++ cfe/trunk/test/AST/ast-print-pragmas.cpp @@ -17,10 +17,12 @@ // CHECK: #pragma clang loop distribute(disable) // CHECK-NEXT: #pragma clang loop vectorize(enable) // CHECK-NEXT: #pragma clang loop interleave(disable) +// CHECK-NEXT: #pragma clang loop vectorize_predicate(disable) #pragma clang loop distribute(disable) #pragma clang loop vectorize(enable) #pragma clang loop interleave(disable) +#pragma clang loop vectorize_predicate(disable) // CHECK-NEXT: while (i - 1 < Length) while (i - 1 < Length) { List[i] = i * 2; @@ -30,10 +32,12 @@ // CHECK: #pragma clang loop distribute(enable) // CHECK-NEXT: #pragma clang loop vectorize(disable) // CHECK-NEXT: #pragma clang loop interleave(enable) +// CHECK-NEXT: #pragma clang loop vectorize_predicate(enable) #pragma clang loop distribute(enable) #pragma clang loop vectorize(disable) #pragma clang loop interleave(enable) +#pragma clang loop vectorize_predicate(enable) // CHECK-NEXT: while (i - 2 < Length) while (i - 2 < Length) { List[i] = i * 2; Index: cfe/trunk/test/CodeGenCXX/pragma-loop-predicate.cpp =================================================================== --- cfe/trunk/test/CodeGenCXX/pragma-loop-predicate.cpp +++ cfe/trunk/test/CodeGenCXX/pragma-loop-predicate.cpp @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s + +void test0(int *List, int Length) { +// CHECK-LABEL: @{{.*}}test0{{.*}}( +// CHECK: br label {{.*}}, !llvm.loop ![[LOOP0:.*]] + + #pragma clang loop vectorize(enable) + for (int i = 0; i < Length; i++) + List[i] = i * 2; +} + +void test1(int *List, int Length) { +// CHECK-LABEL: @{{.*}}test1{{.*}}( +// CHECK: br label {{.*}}, !llvm.loop ![[LOOP1:.*]] + + #pragma clang loop vectorize(enable) vectorize_predicate(enable) + for (int i = 0; i < Length; i++) + List[i] = i * 2; +} + +void test2(int *List, int Length) { +// CHECK-LABEL: @{{.*}}test2{{.*}}( +// CHECK: br label {{.*}}, !llvm.loop ![[LOOP2:.*]] + + #pragma clang loop vectorize(enable) vectorize_predicate(disable) + for (int i = 0; i < Length; i++) + List[i] = i * 2; +} + +// CHECK: ![[LOOP0]] = distinct !{![[LOOP0]], !3} +// CHECK-NEXT: !3 = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-NEXT: ![[LOOP1]] = distinct !{![[LOOP1]], !5, !3} +// CHECK-NEXT: !5 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} +// CHECK-NEXT: ![[LOOP2]] = distinct !{![[LOOP2]], !7, !3} +// CHECK-NEXT: !7 = !{!"llvm.loop.vectorize.predicate.enable", i1 false} Index: cfe/trunk/test/Parser/pragma-loop.cpp =================================================================== --- cfe/trunk/test/Parser/pragma-loop.cpp +++ cfe/trunk/test/Parser/pragma-loop.cpp @@ -81,6 +81,7 @@ #pragma clang loop vectorize(enable) #pragma clang loop interleave(enable) +#pragma clang loop vectorize_predicate(enable) #pragma clang loop unroll(full) while (i + 1 < Length) { List[i] = i; @@ -95,6 +96,7 @@ #pragma clang loop vectorize(disable) #pragma clang loop interleave(disable) +#pragma clang loop vectorize_predicate(disable) #pragma clang loop unroll(disable) while (i - 1 < Length) { List[i] = i; @@ -111,7 +113,7 @@ } int VList[Length]; -#pragma clang loop vectorize(disable) interleave(disable) unroll(disable) +#pragma clang loop vectorize(disable) interleave(disable) unroll(disable) vectorize_predicate(disable) for (int j : VList) { VList[j] = List[j]; } @@ -130,11 +132,13 @@ /* expected-error {{expected '('}} */ #pragma clang loop vectorize /* expected-error {{expected '('}} */ #pragma clang loop interleave +/* expected-error {{expected '('}} */ #pragma clang loop vectorize_predicate /* expected-error {{expected '('}} */ #pragma clang loop unroll /* expected-error {{expected '('}} */ #pragma clang loop distribute /* expected-error {{expected ')'}} */ #pragma clang loop vectorize(enable /* expected-error {{expected ')'}} */ #pragma clang loop interleave(enable +/* expected-error {{expected ')'}} */ #pragma clang loop vectorize_predicate(enable /* expected-error {{expected ')'}} */ #pragma clang loop unroll(full /* expected-error {{expected ')'}} */ #pragma clang loop distribute(enable @@ -147,7 +151,7 @@ /* expected-error {{missing argument; expected 'enable', 'full' or 'disable'}} */ #pragma clang loop unroll() /* expected-error {{missing argument; expected 'enable' or 'disable'}} */ #pragma clang loop distribute() -/* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, or distribute}} */ #pragma clang loop +/* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute}} */ #pragma clang loop /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword(enable) /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop vectorize(enable) badkeyword(4) @@ -245,6 +249,9 @@ /* expected-error {{duplicate directives 'vectorize(enable)' and 'vectorize(disable)'}} */ #pragma clang loop vectorize(disable) #pragma clang loop interleave(enable) /* expected-error {{duplicate directives 'interleave(enable)' and 'interleave(disable)'}} */ #pragma clang loop interleave(disable) +#pragma clang loop vectorize_predicate(enable) +/* expected-error@+1 {{duplicate directives 'vectorize_predicate(enable)' and 'vectorize_predicate(disable)'}} */ +#pragma clang loop vectorize_predicate(disable) #pragma clang loop unroll(full) /* expected-error {{duplicate directives 'unroll(full)' and 'unroll(disable)'}} */ #pragma clang loop unroll(disable) #pragma clang loop distribute(enable) @@ -281,3 +288,7 @@ #pragma clang loop interleave(enable) /* expected-error {{expected statement}} */ } + +void foo(void) { +#pragma clang loop vectorize_predicate(enable) +/* expected-error {{expected statement}} */ } Index: cfe/trunk/test/Parser/pragma-unroll-and-jam.cpp =================================================================== --- cfe/trunk/test/Parser/pragma-unroll-and-jam.cpp +++ cfe/trunk/test/Parser/pragma-unroll-and-jam.cpp @@ -67,7 +67,7 @@ } // pragma clang unroll_and_jam is disabled for the moment -/* expected-error {{invalid option 'unroll_and_jam'; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, or distribute}} */ #pragma clang loop unroll_and_jam(4) +/* expected-error {{invalid option 'unroll_and_jam'; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute}} */ #pragma clang loop unroll_and_jam(4) for (int i = 0; i < Length; i++) { for (int j = 0; j < Length; j++) { List[i * Length + j] = Value; Index: llvm/trunk/docs/LangRef.rst =================================================================== --- llvm/trunk/docs/LangRef.rst +++ llvm/trunk/docs/LangRef.rst @@ -5423,6 +5423,21 @@ !0 = !{!"llvm.loop.vectorize.enable", i1 0} !1 = !{!"llvm.loop.vectorize.enable", i1 1} +'``llvm.loop.vectorize.predicate.enable``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This metadata selectively enables or disables creating predicated instructions +for the loop, which can enable folding of the scalar epilogue loop into the +main loop. The first operand is the string +``llvm.loop.vectorize.predicate.enable`` and the second operand is a bit. If +the bit operand value is 1 vectorization is enabled. A value of 0 disables +vectorization: + +.. code-block:: llvm + + !0 = !{!"llvm.loop.vectorize.predicate.enable", i1 0} + !1 = !{!"llvm.loop.vectorize.predicate.enable", i1 1} + '``llvm.loop.vectorize.width``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^