Index: clang/docs/LanguageExtensions.rst =================================================================== --- clang/docs/LanguageExtensions.rst +++ clang/docs/LanguageExtensions.rst @@ -3061,7 +3061,7 @@ The ``#pragma clang loop`` directive is used to specify hints for optimizing the subsequent for, while, do-while, or c++11 range-based for loop. The directive -provides options for vectorization, interleaving, predication, unrolling and +provides options for vectorization, interleaving, predication, aligning, unrolling and distribution. Loop hints can be specified before any loop and will be ignored if the optimization is not safe to apply. @@ -3124,6 +3124,17 @@ might be more efficient when vector predication is efficiently supported by the target platform. +Vector aligning is enabled by ``aligned(enable)``, for example: + +.. code-block::c++ + #pragma clang loop aligned(enable) + for(...) { + ... + } + +This predicates all the array references inside the loop to be aligned. The aligned access to them can increase fetch time and increase the performance. + + Loop Unrolling -------------- Index: clang/include/clang/Basic/Attr.td =================================================================== --- clang/include/clang/Basic/Attr.td +++ clang/include/clang/Basic/Attr.td @@ -3010,11 +3010,11 @@ ["vectorize", "vectorize_width", "interleave", "interleave_count", "unroll", "unroll_count", "unroll_and_jam", "unroll_and_jam_count", "pipeline", "pipeline_initiation_interval", "distribute", - "vectorize_predicate"], + "vectorize_predicate", "aligned"], ["Vectorize", "VectorizeWidth", "Interleave", "InterleaveCount", "Unroll", "UnrollCount", "UnrollAndJam", "UnrollAndJamCount", "PipelineDisabled", "PipelineInitiationInterval", "Distribute", - "VectorizePredicate"]>, + "VectorizePredicate", "Aligned"]>, EnumArgument<"State", "LoopHintState", ["enable", "disable", "numeric", "assume_safety", "full"], ["Enable", "Disable", "Numeric", "AssumeSafety", "Full"]>, @@ -3035,6 +3035,7 @@ case PipelineInitiationInterval: return "pipeline_initiation_interval"; case Distribute: return "distribute"; case VectorizePredicate: return "vectorize_predicate"; + case Aligned: return "aligned"; } llvm_unreachable("Unhandled LoopHint option."); } Index: clang/include/clang/Basic/AttrDocs.td =================================================================== --- clang/include/clang/Basic/AttrDocs.td +++ clang/include/clang/Basic/AttrDocs.td @@ -2755,7 +2755,7 @@ let Content = [{ The ``#pragma clang loop`` directive allows loop optimization hints to be specified for the subsequent loop. The directive allows pipelining to be -disabled, or vectorization, vector predication, interleaving, and unrolling to +disabled, or vectorization, vector predication, aligned(array references), interleaving, and unrolling to be enabled or disabled. Vector width, vector predication, interleave count, unrolling count, and the initiation interval for pipelining can be explicitly specified. See `language extensions Index: clang/include/clang/Basic/DiagnosticParseKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticParseKinds.td +++ clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1210,7 +1210,7 @@ def err_pragma_loop_invalid_option : Error< "%select{invalid|missing}0 option%select{ %1|}0; expected vectorize, " "vectorize_width, interleave, interleave_count, unroll, unroll_count, " - "pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute">; + "pipeline, pipeline_initiation_interval, vectorize_predicate, aligned, or distribute">; def err_pragma_fp_invalid_option : Error< "%select{invalid|missing}0 option%select{ %1|}0; expected contract">; Index: clang/lib/CodeGen/CGLoopInfo.h =================================================================== --- clang/lib/CodeGen/CGLoopInfo.h +++ clang/lib/CodeGen/CGLoopInfo.h @@ -54,6 +54,9 @@ /// Value for llvm.loop.vectorize.predicate metadata LVEnableState VectorizePredicateEnable; + /// Value of llvm.loop.aligned metadata + LVEnableState AlignedEnable; + /// Value for llvm.loop.vectorize.width metadata. unsigned VectorizeWidth; @@ -229,6 +232,11 @@ Enable ? LoopAttributes::Enable : LoopAttributes::Disable; } + /// Set the next pushed loop 'aligned' + void setAlignedState(const LoopAttributes::LVEnableState &State){ + StagedAttrs.AlignedEnable = State; + } + /// Set the next pushed loop as a distribution candidate. void setDistributeState(bool Enable = true) { StagedAttrs.DistributeEnable = Index: clang/lib/CodeGen/CGLoopInfo.cpp =================================================================== --- clang/lib/CodeGen/CGLoopInfo.cpp +++ clang/lib/CodeGen/CGLoopInfo.cpp @@ -219,6 +219,7 @@ Enabled = false; else if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified || + Attrs.AlignedEnable != LoopAttributes::Unspecified || Attrs.InterleaveCount != 0 || Attrs.VectorizeWidth != 0) Enabled = true; @@ -268,6 +269,18 @@ Args.push_back(MDNode::get(Ctx, Vals)); } + //Setting vectorize.aligned + bool IsAlignedEnabled = false; + if (Attrs.AlignedEnable != LoopAttributes::Unspecified && + Attrs.AlignedEnable != LoopAttributes::Disable ) { + IsAlignedEnabled = (Attrs.AlignedEnable == LoopAttributes::Enable); + + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.aligned.enable"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt1Ty(Ctx), IsAlignedEnabled)) + }; + Args.push_back(MDNode::get(Ctx, Vals)); + } // Setting vectorize.width if (Attrs.VectorizeWidth > 0) { Metadata *Vals[] = { @@ -432,7 +445,8 @@ : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified), UnrollEnable(LoopAttributes::Unspecified), UnrollAndJamEnable(LoopAttributes::Unspecified), - VectorizePredicateEnable(LoopAttributes::Unspecified), VectorizeWidth(0), + VectorizePredicateEnable(LoopAttributes::Unspecified), + AlignedEnable(LoopAttributes::Unspecified), VectorizeWidth(0), InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0), DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false), PipelineInitiationInterval(0) {} @@ -444,6 +458,7 @@ UnrollCount = 0; UnrollAndJamCount = 0; VectorizeEnable = LoopAttributes::Unspecified; + AlignedEnable = LoopAttributes::Unspecified; UnrollEnable = LoopAttributes::Unspecified; UnrollAndJamEnable = LoopAttributes::Unspecified; VectorizePredicateEnable = LoopAttributes::Unspecified; @@ -469,6 +484,7 @@ Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && Attrs.PipelineInitiationInterval == 0 && Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified && + Attrs.AlignedEnable == LoopAttributes::Unspecified && Attrs.VectorizeEnable == LoopAttributes::Unspecified && Attrs.UnrollEnable == LoopAttributes::Unspecified && Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && @@ -504,6 +520,7 @@ BeforeJam.VectorizeEnable = Attrs.VectorizeEnable; BeforeJam.DistributeEnable = Attrs.DistributeEnable; BeforeJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable; + BeforeJam.AlignedEnable = Attrs.AlignedEnable; switch (Attrs.UnrollEnable) { case LoopAttributes::Unspecified: @@ -519,6 +536,7 @@ break; } + AfterJam.AlignedEnable = Attrs.AlignedEnable; AfterJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable; AfterJam.UnrollCount = Attrs.UnrollCount; AfterJam.PipelineDisabled = Attrs.PipelineDisabled; @@ -542,6 +560,7 @@ SmallVector BeforeLoopProperties; if (BeforeJam.VectorizeEnable != LoopAttributes::Unspecified || BeforeJam.VectorizePredicateEnable != LoopAttributes::Unspecified || + BeforeJam.AlignedEnable != LoopAttributes::Unspecified || BeforeJam.InterleaveCount != 0 || BeforeJam.VectorizeWidth != 0) BeforeLoopProperties.push_back( MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); @@ -632,6 +651,9 @@ case LoopHintAttr::VectorizePredicate: setVectorizePredicateState(LoopAttributes::Disable); break; + case LoopHintAttr::Aligned: + setAlignedState(LoopAttributes::Disable); + break; case LoopHintAttr::Distribute: setDistributeState(false); break; @@ -662,6 +684,9 @@ case LoopHintAttr::VectorizePredicate: setVectorizePredicateState(LoopAttributes::Enable); break; + case LoopHintAttr::Aligned: + setAlignedState(LoopAttributes::Enable); + break; case LoopHintAttr::Distribute: setDistributeState(true); break; @@ -686,6 +711,7 @@ case LoopHintAttr::Unroll: case LoopHintAttr::UnrollAndJam: case LoopHintAttr::VectorizePredicate: + case LoopHintAttr::Aligned: case LoopHintAttr::UnrollCount: case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: @@ -715,6 +741,7 @@ case LoopHintAttr::PipelineDisabled: case LoopHintAttr::PipelineInitiationInterval: case LoopHintAttr::VectorizePredicate: + case LoopHintAttr::Aligned: llvm_unreachable("Options cannot be used with 'full' hint."); break; } @@ -739,6 +766,7 @@ case LoopHintAttr::Unroll: case LoopHintAttr::UnrollAndJam: case LoopHintAttr::VectorizePredicate: + case LoopHintAttr::Aligned: case LoopHintAttr::Vectorize: case LoopHintAttr::Interleave: case LoopHintAttr::Distribute: Index: clang/lib/Parse/ParsePragma.cpp =================================================================== --- clang/lib/Parse/ParsePragma.cpp +++ clang/lib/Parse/ParsePragma.cpp @@ -1067,6 +1067,7 @@ .Case("vectorize", true) .Case("interleave", true) .Case("vectorize_predicate", true) + .Case("aligned", true) .Default(false) || OptionUnroll || OptionUnrollAndJam || OptionDistribute || OptionPipelineDisabled; @@ -2821,6 +2822,7 @@ /// 'interleave' '(' loop-hint-keyword ')' /// 'unroll' '(' unroll-hint-keyword ')' /// 'vectorize_predicate' '(' loop-hint-keyword ')' +/// 'aligned' '(' loop-hint-keyword')' /// 'vectorize_width' '(' loop-hint-value ')' /// 'interleave_count' '(' loop-hint-value ')' /// 'unroll_count' '(' loop-hint-value ')' @@ -2883,6 +2885,7 @@ .Case("unroll", true) .Case("distribute", true) .Case("vectorize_predicate", true) + .Case("aligned", true) .Case("vectorize_width", true) .Case("interleave_count", true) .Case("unroll_count", true) Index: clang/lib/Sema/SemaStmtAttr.cpp =================================================================== --- clang/lib/Sema/SemaStmtAttr.cpp +++ clang/lib/Sema/SemaStmtAttr.cpp @@ -134,6 +134,7 @@ .Case("vectorize_width", LoopHintAttr::VectorizeWidth) .Case("interleave", LoopHintAttr::Interleave) .Case("vectorize_predicate", LoopHintAttr::VectorizePredicate) + .Case("aligned", LoopHintAttr::Aligned) .Case("interleave_count", LoopHintAttr::InterleaveCount) .Case("unroll", LoopHintAttr::Unroll) .Case("unroll_count", LoopHintAttr::UnrollCount) @@ -153,6 +154,7 @@ } else if (Option == LoopHintAttr::Vectorize || Option == LoopHintAttr::Interleave || Option == LoopHintAttr::VectorizePredicate || + Option == LoopHintAttr::Aligned || Option == LoopHintAttr::Unroll || Option == LoopHintAttr::Distribute || Option == LoopHintAttr::PipelineDisabled) { @@ -192,7 +194,7 @@ const LoopHintAttr *NumericAttr; } HintAttrs[] = {{nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}, - {nullptr, nullptr}}; + {nullptr, nullptr}, {nullptr, nullptr}}; for (const auto *I : Attrs) { const LoopHintAttr *LH = dyn_cast(I); @@ -209,7 +211,8 @@ UnrollAndJam, Distribute, Pipeline, - VectorizePredicate + VectorizePredicate, + Aligned } Category; switch (Option) { case LoopHintAttr::Vectorize: @@ -239,6 +242,9 @@ case LoopHintAttr::VectorizePredicate: Category = VectorizePredicate; break; + case LoopHintAttr::Aligned: + Category = Aligned; + break; }; assert(Category < sizeof(HintAttrs) / sizeof(HintAttrs[0])); @@ -248,6 +254,7 @@ Option == LoopHintAttr::Interleave || Option == LoopHintAttr::Unroll || Option == LoopHintAttr::UnrollAndJam || Option == LoopHintAttr::VectorizePredicate || + Option == LoopHintAttr::Aligned || Option == LoopHintAttr::PipelineDisabled || Option == LoopHintAttr::Distribute) { // Enable|Disable|AssumeSafety hint. For example, vectorize(enable). Index: clang/test/AST/ast-print-pragmas.cpp =================================================================== --- clang/test/AST/ast-print-pragmas.cpp +++ clang/test/AST/ast-print-pragmas.cpp @@ -23,6 +23,7 @@ #pragma clang loop vectorize(enable) #pragma clang loop interleave(disable) #pragma clang loop vectorize_predicate(disable) +#pragma clang loop aligned(disable) // CHECK-NEXT: while (i - 1 < Length) while (i - 1 < Length) { List[i] = i * 2; @@ -38,6 +39,7 @@ #pragma clang loop vectorize(disable) #pragma clang loop interleave(enable) #pragma clang loop vectorize_predicate(enable) +#pragma clang loop aligned(enable) // CHECK-NEXT: while (i - 2 < Length) while (i - 2 < Length) { List[i] = i * 2; Index: clang/test/CodeGenCXX/pragma-loop-aligned.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenCXX/pragma-loop-aligned.cpp @@ -0,0 +1,78 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s + +void test0(int *List, int Length) { +// CHECK-LABEL: @{{.*}}test0{{.*}}( +// CHECK: br label {{.*}}, !llvm.loop ![[LOOP0:.*]] + + #pragma clang loop vectorize(enable) + for (int i = 0; i < Length; i++) + List[i] = i * 2; +} + +void test1(int *List, int Length) { +// CHECK-LABEL: @{{.*}}test1{{.*}}( +// CHECK: br label {{.*}}, !llvm.loop ![[LOOP1:.*]] + + #pragma clang loop vectorize(enable) aligned(enable) + for (int i = 0; i < Length; i++) + List[i] = i * 2; +} + +void test2(int *List, int Length) { +// CHECK-LABEL: @{{.*}}test2{{.*}}( +// CHECK: br label {{.*}}, !llvm.loop ![[LOOP2:.*]] + + #pragma clang loop aligned(disable) + for (int i = 0; i < Length; i++) + List[i] = i * 2; +} + +// vectorize_predicate(enable) implies vectorize(enable) +void test3(int *List, int Length) { +// CHECK-LABEL: @{{.*}}test3{{.*}}( +// CHECK: br label {{.*}}, !llvm.loop ![[LOOP3:.*]] + + #pragma clang loop aligned(enable) + for (int i = 0; i < Length; i++) + List[i] = i * 2; +} + +// Check that disabling vectorization means a vectorization width of 1, and +// also that vectorization_predicate isn't enabled. +void test4(int *List, int Length) { +// CHECK-LABEL: @{{.*}}test4{{.*}}( +// CHECK: br label {{.*}}, !llvm.loop ![[LOOP4:.*]] + + #pragma clang loop vectorize(disable) + for (int i = 0; i < Length; i++) + List[i] = i * 2; +} + +// Check that vectorize and aligned are disabled. +void test5(int *List, int Length) { +// CHECK-LABEL: @{{.*}}test5{{.*}}( +// CHECK: br label {{.*}}, !llvm.loop ![[LOOP5:.*]] + + #pragma clang loop vectorize(disable) aligned(enable) + for (int i = 0; i < Length; i++) + List[i] = i * 2; +} + + +// CHECK: ![[LOOP0]] = distinct !{![[LOOP0]], !3} +// CHECK-NEXT: !3 = !{!"llvm.loop.vectorize.enable", i1 true} + +// CHECK-NEXT: ![[LOOP1]] = distinct !{![[LOOP1]], !5, !3} +// CHECK-NEXT: !5 = !{!"llvm.loop.aligned.enable", i1 true} + +// CHECK-NEXT: ![[LOOP2]] = distinct !{![[LOOP2]], !7, !3} +// CHECK-NEXT: !7 = !{!"llvm.loop.aligned.enable", i1 false} + +// CHECK-NEXT: ![[LOOP3]] = distinct !{![[LOOP3]], !5, !3} + +// CHECK-NEXT: ![[LOOP4]] = distinct !{![[LOOP4]], !10} +// CHECK-NEXT: !10 = !{!"llvm.loop.vectorize.width", i32 1} + +// CHECK-NEXT: ![[LOOP5]] = distinct !{![[LOOP5]], !10} Index: clang/test/Parser/pragma-loop.cpp =================================================================== --- clang/test/Parser/pragma-loop.cpp +++ clang/test/Parser/pragma-loop.cpp @@ -82,6 +82,7 @@ #pragma clang loop vectorize(enable) #pragma clang loop interleave(enable) #pragma clang loop vectorize_predicate(enable) +#pragma clagn loop aligned(enable) #pragma clang loop unroll(full) while (i + 1 < Length) { List[i] = i; @@ -97,6 +98,7 @@ #pragma clang loop vectorize(disable) #pragma clang loop interleave(disable) #pragma clang loop vectorize_predicate(disable) +#pragma clang loop aligned(disable) #pragma clang loop unroll(disable) while (i - 1 < Length) { List[i] = i; @@ -113,7 +115,7 @@ } int VList[Length]; -#pragma clang loop vectorize(disable) interleave(disable) unroll(disable) vectorize_predicate(disable) +#pragma clang loop vectorize(disable) interleave(disable) unroll(disable) vectorize_predicate(disable) aligned(disable) for (int j : VList) { VList[j] = List[j]; } @@ -133,12 +135,14 @@ /* expected-error {{expected '('}} */ #pragma clang loop vectorize /* expected-error {{expected '('}} */ #pragma clang loop interleave /* expected-error {{expected '('}} */ #pragma clang loop vectorize_predicate +/* expected-error {{expected '('}} */ #pragma clang loop aligned /* expected-error {{expected '('}} */ #pragma clang loop unroll /* expected-error {{expected '('}} */ #pragma clang loop distribute /* expected-error {{expected ')'}} */ #pragma clang loop vectorize(enable /* expected-error {{expected ')'}} */ #pragma clang loop interleave(enable /* expected-error {{expected ')'}} */ #pragma clang loop vectorize_predicate(enable +/* expected-error {{expected ')'}} */ #pragma clang loop aligned(enable /* expected-error {{expected ')'}} */ #pragma clang loop unroll(full /* expected-error {{expected ')'}} */ #pragma clang loop distribute(enable @@ -151,7 +155,7 @@ /* expected-error {{missing argument; expected 'enable', 'full' or 'disable'}} */ #pragma clang loop unroll() /* expected-error {{missing argument; expected 'enable' or 'disable'}} */ #pragma clang loop distribute() -/* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute}} */ #pragma clang loop +/* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, aligned, or distribute}} */ #pragma clang loop /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword(enable) /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop vectorize(enable) badkeyword(4) @@ -252,6 +256,9 @@ #pragma clang loop vectorize_predicate(enable) /* expected-error@+1 {{duplicate directives 'vectorize_predicate(enable)' and 'vectorize_predicate(disable)'}} */ #pragma clang loop vectorize_predicate(disable) +#pragma clang loop aligned(enable) +/* expected-error@+1 {{duplicate directives 'aligned(enable)' and 'aligned(disable)'}} */ +#pragma clang loop aligned(disable) #pragma clang loop unroll(full) /* expected-error {{duplicate directives 'unroll(full)' and 'unroll(disable)'}} */ #pragma clang loop unroll(disable) #pragma clang loop distribute(enable) @@ -292,3 +299,11 @@ void foo(void) { #pragma clang loop vectorize_predicate(enable) /* expected-error {{expected statement}} */ } + +#pragma clang loop aligned(disable) +/* expected-error {{incompatible directives 'unroll(disable)' and 'unroll_count(4)'}} */ #pragma clang loop unroll_count(4) + while (i-10 < Length) { + List[i] = i; + } + + Index: clang/test/Parser/pragma-unroll-and-jam.cpp =================================================================== --- clang/test/Parser/pragma-unroll-and-jam.cpp +++ clang/test/Parser/pragma-unroll-and-jam.cpp @@ -67,7 +67,7 @@ } // pragma clang unroll_and_jam is disabled for the moment -/* expected-error {{invalid option 'unroll_and_jam'; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute}} */ #pragma clang loop unroll_and_jam(4) +/* expected-error {{invalid option 'unroll_and_jam'; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, aligned, or distribute}} */ #pragma clang loop unroll_and_jam(4) for (int i = 0; i < Length; i++) { for (int j = 0; j < Length; j++) { List[i * Length + j] = Value;