Index: clang/docs/LanguageExtensions.rst =================================================================== --- clang/docs/LanguageExtensions.rst +++ clang/docs/LanguageExtensions.rst @@ -3023,9 +3023,9 @@ The ``#pragma clang loop`` directive is used to specify hints for optimizing the subsequent for, while, do-while, or c++11 range-based for loop. The directive -provides options for vectorization, interleaving, predication, unrolling and -distribution. Loop hints can be specified before any loop and will be ignored if -the optimization is not safe to apply. +provides options for vectorization, interleaving, predication, ignoring vector +dependencies, unrolling and distribution. Loop hints can be specified before any +loop and will be ignored if the optimization is not safe to apply. There are loop hints that control transformations (e.g. vectorization, loop unrolling) and there are loop hints that set transformation options (e.g. @@ -3094,6 +3094,21 @@ might be more efficient when vector predication is efficiently supported by the target platform. +Ignore Vector Dependencies in loop +---------------------------------- +Dependencies in a loop that inhibit vectorization of the loop can be ignored by +enabling ``ivdep(enable)``. + +.. code-block:: c++ + + #pragma clang loop ivdep(enable) + for(...) { + ... + } + +This hints to the vectorizer that all load and store instructions can be +executed in parallel. This pragma will also imply ``vectorize(enable)``. + Loop Unrolling -------------- Index: clang/include/clang/Basic/Attr.td =================================================================== --- clang/include/clang/Basic/Attr.td +++ clang/include/clang/Basic/Attr.td @@ -3036,11 +3036,11 @@ ["vectorize", "vectorize_width", "interleave", "interleave_count", "unroll", "unroll_count", "unroll_and_jam", "unroll_and_jam_count", "pipeline", "pipeline_initiation_interval", "distribute", - "vectorize_predicate"], + "vectorize_predicate", "ivdep"], ["Vectorize", "VectorizeWidth", "Interleave", "InterleaveCount", "Unroll", "UnrollCount", "UnrollAndJam", "UnrollAndJamCount", "PipelineDisabled", "PipelineInitiationInterval", "Distribute", - "VectorizePredicate"]>, + "VectorizePredicate", "Ivdep"]>, EnumArgument<"State", "LoopHintState", ["enable", "disable", "numeric", "assume_safety", "full"], ["Enable", "Disable", "Numeric", "AssumeSafety", "Full"]>, @@ -3061,6 +3061,7 @@ case PipelineInitiationInterval: return "pipeline_initiation_interval"; case Distribute: return "distribute"; case VectorizePredicate: return "vectorize_predicate"; + case Ivdep: return "ivdep"; } llvm_unreachable("Unhandled LoopHint option."); } Index: clang/include/clang/Basic/AttrDocs.td =================================================================== --- clang/include/clang/Basic/AttrDocs.td +++ clang/include/clang/Basic/AttrDocs.td @@ -2787,10 +2787,11 @@ let Content = [{ The ``#pragma clang loop`` directive allows loop optimization hints to be specified for the subsequent loop. The directive allows pipelining to be -disabled, or vectorization, vector predication, interleaving, and unrolling to -be enabled or disabled. Vector width, vector predication, interleave count, -unrolling count, and the initiation interval for pipelining can be explicitly -specified. See `language extensions +disabled, vector dependencies to be ignored, or vectorization, vector +predication, interleaving, and unrolling to be enabled or disabled. Vector +width, vector predication, interleave count, unrolling count, and the +initiation interval for pipelining can be explicitly specified. +See `language extensions `_ for details. }]; Index: clang/include/clang/Basic/DiagnosticParseKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticParseKinds.td +++ clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1231,7 +1231,7 @@ def err_pragma_loop_invalid_option : Error< "%select{invalid|missing}0 option%select{ %1|}0; expected vectorize, " "vectorize_width, interleave, interleave_count, unroll, unroll_count, " - "pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute">; + "pipeline, pipeline_initiation_interval, vectorize_predicate, ivdep, or distribute">; def err_pragma_fp_invalid_option : Error< "%select{invalid|missing}0 option%select{ %1|}0; expected contract">; Index: clang/lib/CodeGen/CGLoopInfo.cpp =================================================================== --- clang/lib/CodeGen/CGLoopInfo.cpp +++ clang/lib/CodeGen/CGLoopInfo.cpp @@ -647,6 +647,9 @@ case LoopHintAttr::PipelineDisabled: setPipelineDisabled(true); break; + case LoopHintAttr::Ivdep: + setParallel(false); + break; case LoopHintAttr::UnrollCount: case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: @@ -674,6 +677,12 @@ case LoopHintAttr::Distribute: setDistributeState(true); break; + case LoopHintAttr::Ivdep: + // Apply "llvm.loop.parallel_access" metadata to load/stores and set + // vectorize(enable). + setParallel(true); + setVectorizeEnable(true); + break; case LoopHintAttr::UnrollCount: case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: @@ -695,6 +704,7 @@ case LoopHintAttr::Unroll: case LoopHintAttr::UnrollAndJam: case LoopHintAttr::VectorizePredicate: + case LoopHintAttr::Ivdep: case LoopHintAttr::UnrollCount: case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: @@ -716,6 +726,7 @@ break; case LoopHintAttr::Vectorize: case LoopHintAttr::Interleave: + case LoopHintAttr::Ivdep: case LoopHintAttr::UnrollCount: case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: @@ -749,6 +760,7 @@ case LoopHintAttr::UnrollAndJam: case LoopHintAttr::VectorizePredicate: case LoopHintAttr::Vectorize: + case LoopHintAttr::Ivdep: case LoopHintAttr::Interleave: case LoopHintAttr::Distribute: case LoopHintAttr::PipelineDisabled: Index: clang/lib/Parse/ParsePragma.cpp =================================================================== --- clang/lib/Parse/ParsePragma.cpp +++ clang/lib/Parse/ParsePragma.cpp @@ -1067,6 +1067,7 @@ .Case("vectorize", true) .Case("interleave", true) .Case("vectorize_predicate", true) + .Case("ivdep", true) .Default(false) || OptionUnroll || OptionUnrollAndJam || OptionDistribute || OptionPipelineDisabled; @@ -2823,6 +2824,7 @@ /// 'interleave' '(' loop-hint-keyword ')' /// 'unroll' '(' unroll-hint-keyword ')' /// 'vectorize_predicate' '(' loop-hint-keyword ')' +/// 'ivdep' '(' loop-hint-keyword ')' /// 'vectorize_width' '(' loop-hint-value ')' /// 'interleave_count' '(' loop-hint-value ')' /// 'unroll_count' '(' loop-hint-value ')' @@ -2885,6 +2887,7 @@ .Case("unroll", true) .Case("distribute", true) .Case("vectorize_predicate", true) + .Case("ivdep", true) .Case("vectorize_width", true) .Case("interleave_count", true) .Case("unroll_count", true) Index: clang/lib/Sema/SemaStmtAttr.cpp =================================================================== --- clang/lib/Sema/SemaStmtAttr.cpp +++ clang/lib/Sema/SemaStmtAttr.cpp @@ -130,6 +130,7 @@ .Case("vectorize_width", LoopHintAttr::VectorizeWidth) .Case("interleave", LoopHintAttr::Interleave) .Case("vectorize_predicate", LoopHintAttr::VectorizePredicate) + .Case("ivdep", LoopHintAttr::Ivdep) .Case("interleave_count", LoopHintAttr::InterleaveCount) .Case("unroll", LoopHintAttr::Unroll) .Case("unroll_count", LoopHintAttr::UnrollCount) @@ -149,6 +150,7 @@ } else if (Option == LoopHintAttr::Vectorize || Option == LoopHintAttr::Interleave || Option == LoopHintAttr::VectorizePredicate || + Option == LoopHintAttr::Ivdep || Option == LoopHintAttr::Unroll || Option == LoopHintAttr::Distribute || Option == LoopHintAttr::PipelineDisabled) { @@ -187,7 +189,7 @@ const LoopHintAttr *NumericAttr; } HintAttrs[] = {{nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}, - {nullptr, nullptr}}; + {nullptr, nullptr}, {nullptr, nullptr}}; for (const auto *I : Attrs) { const LoopHintAttr *LH = dyn_cast(I); @@ -200,6 +202,7 @@ enum { Vectorize, Interleave, + Ivdep, Unroll, UnrollAndJam, Distribute, @@ -234,13 +237,18 @@ case LoopHintAttr::VectorizePredicate: Category = VectorizePredicate; break; + case LoopHintAttr::Ivdep: + Category = Ivdep; + break; }; assert(Category < sizeof(HintAttrs) / sizeof(HintAttrs[0])); auto &CategoryState = HintAttrs[Category]; const LoopHintAttr *PrevAttr; if (Option == LoopHintAttr::Vectorize || - Option == LoopHintAttr::Interleave || Option == LoopHintAttr::Unroll || + Option == LoopHintAttr::Interleave || + Option == LoopHintAttr::Ivdep || + Option == LoopHintAttr::Unroll || Option == LoopHintAttr::UnrollAndJam || Option == LoopHintAttr::VectorizePredicate || Option == LoopHintAttr::PipelineDisabled || Index: clang/test/AST/ast-print-pragmas.cpp =================================================================== --- clang/test/AST/ast-print-pragmas.cpp +++ clang/test/AST/ast-print-pragmas.cpp @@ -8,37 +8,41 @@ int i = 0; #pragma clang loop vectorize_width(4) #pragma clang loop interleave_count(8) -// CHECK-NEXT: while (i < Length) + // CHECK-NEXT: while (i < Length) while (i < Length) { List[i] = i * 2; i++; } -// CHECK: #pragma clang loop distribute(disable) -// CHECK-NEXT: #pragma clang loop vectorize(enable) -// CHECK-NEXT: #pragma clang loop interleave(disable) -// CHECK-NEXT: #pragma clang loop vectorize_predicate(disable) + // CHECK: #pragma clang loop distribute(disable) + // CHECK-NEXT: #pragma clang loop vectorize(enable) + // CHECK-NEXT: #pragma clang loop interleave(disable) + // CHECK-NEXT: #pragma clang loop vectorize_predicate(disable) + // CHECK-NEXT: #pragma clang loop ivdep(disable) #pragma clang loop distribute(disable) #pragma clang loop vectorize(enable) #pragma clang loop interleave(disable) #pragma clang loop vectorize_predicate(disable) -// CHECK-NEXT: while (i - 1 < Length) +#pragma clang loop ivdep(disable) + // CHECK-NEXT: while (i - 1 < Length) while (i - 1 < Length) { List[i] = i * 2; i++; } -// CHECK: #pragma clang loop distribute(enable) -// CHECK-NEXT: #pragma clang loop vectorize(disable) -// CHECK-NEXT: #pragma clang loop interleave(enable) -// CHECK-NEXT: #pragma clang loop vectorize_predicate(enable) + // CHECK: #pragma clang loop distribute(enable) + // CHECK-NEXT: #pragma clang loop vectorize(disable) + // CHECK-NEXT: #pragma clang loop interleave(enable) + // CHECK-NEXT: #pragma clang loop vectorize_predicate(enable) + // CHECK-NEXT: #pragma clang loop ivdep(enable) #pragma clang loop distribute(enable) #pragma clang loop vectorize(disable) #pragma clang loop interleave(enable) #pragma clang loop vectorize_predicate(enable) -// CHECK-NEXT: while (i - 2 < Length) +#pragma clang loop ivdep(enable) + // CHECK-NEXT: while (i - 2 < Length) while (i - 2 < Length) { List[i] = i * 2; i++; @@ -66,4 +70,3 @@ // MS-EXT-NEXT: int x = 3 __declspec(thread); int __declspec(thread) x = 3; #endif //MS_EXT - Index: clang/test/CodeGenCXX/pragma-loop-ivdep.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenCXX/pragma-loop-ivdep.cpp @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++11 -emit-llvm -o - %s | FileCheck %s + +void test0(int *a, int *b, int LEN_1D) { + // CHECK-LABEL: @{{.*}}test0{{.*}}( + // CHECK: br label {{.*}}, !llvm.loop ![[LOOP0:.*]] + +#pragma clang loop vectorize(enable) + for (int i = 0; i < LEN_1D; i++) + a[b[i]] = a[b[i]] + 1; +} + +void test1(int *a, int *b, int LEN_1D) { + // CHECK-LABEL: @{{.*}}test1{{.*}}( + // CHECK: {{.*}} load {{.*}} !llvm.access.group ![[GROUP0:.*]] + // CHECK: store {{.*}} !llvm.access.group ![[GROUP0:.*]] + // CHECK: br label {{.*}}, !llvm.loop ![[LOOP1:.*]] + +#pragma clang loop ivdep(enable) + for (int i = 0; i < LEN_1D; i++) + a[b[i]] = a[b[i]] + 1; +} + +void test2(int *a, int *b, int LEN_1D) { + // CHECK-LABEL: @{{.*}}test2{{.*}}( + // CHECK: {{.*}} load {{.*}} !llvm.access.group ![[GROUP1:.*]] + // CHECK: store {{.*}} !llvm.access.group ![[GROUP1:.*]] + // CHECK: br label {{.*}}, !llvm.loop ![[LOOP2:.*]] + +#pragma clang loop vectorize(enable) ivdep(enable) + for (int i = 0; i < LEN_1D; i++) + a[b[i]] = a[b[i]] + 1; +} + +void test3(int *a, int *b, int LEN_1D) { + // CHECK-LABEL: @{{.*}}test3{{.*}}( + // CHECK: br label {{.*}}, !llvm.loop ![[LOOP3:.*]] + +#pragma clang loop vectorize(enable) ivdep(disable) + for (int i = 0; i < LEN_1D; i++) + a[b[i]] = a[b[i]] + 1; +} + +// CHECK: ![[LOOP0]] = distinct !{![[LOOP0]], !3} +// CHECK-NEXT: !3 = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-NEXT: ![[GROUP0]] = distinct !{} +// CHECK-NEXT: ![[LOOP1]] = distinct !{![[LOOP1]], !6, !3} +// CHECK-NEXT: !6 = !{!"llvm.loop.parallel_accesses", ![[GROUP0]]} +// CHECK-NEXT: ![[GROUP1]] = distinct !{} +// CHECK-NEXT: ![[LOOP2]] = distinct !{![[LOOP2]], !9, !3} +// CHECK-NEXT: !9 = !{!"llvm.loop.parallel_accesses", ![[GROUP1]]} +// CHECK-NEXT: ![[LOOP3]] = distinct !{![[LOOP3]], !3} Index: clang/test/Parser/pragma-loop.cpp =================================================================== --- clang/test/Parser/pragma-loop.cpp +++ clang/test/Parser/pragma-loop.cpp @@ -82,6 +82,7 @@ #pragma clang loop vectorize(enable) #pragma clang loop interleave(enable) #pragma clang loop vectorize_predicate(enable) +#pragma clang loop ivdep(enable) #pragma clang loop unroll(full) while (i + 1 < Length) { List[i] = i; @@ -97,6 +98,7 @@ #pragma clang loop vectorize(disable) #pragma clang loop interleave(disable) #pragma clang loop vectorize_predicate(disable) +#pragma clang loop ivdep(disable) #pragma clang loop unroll(disable) while (i - 1 < Length) { List[i] = i; @@ -113,7 +115,7 @@ } int VList[Length]; -#pragma clang loop vectorize(disable) interleave(disable) unroll(disable) vectorize_predicate(disable) +#pragma clang loop vectorize(disable) interleave(disable) unroll(disable) vectorize_predicate(disable) ivdep(disable) for (int j : VList) { VList[j] = List[j]; } @@ -135,12 +137,14 @@ /* expected-error {{expected '('}} */ #pragma clang loop vectorize_predicate /* expected-error {{expected '('}} */ #pragma clang loop unroll /* expected-error {{expected '('}} */ #pragma clang loop distribute +/* expected-error {{expected '('}} */ #pragma clang loop ivdep /* expected-error {{expected ')'}} */ #pragma clang loop vectorize(enable /* expected-error {{expected ')'}} */ #pragma clang loop interleave(enable /* expected-error {{expected ')'}} */ #pragma clang loop vectorize_predicate(enable /* expected-error {{expected ')'}} */ #pragma clang loop unroll(full /* expected-error {{expected ')'}} */ #pragma clang loop distribute(enable +/* expected-error {{expected ')'}} */ #pragma clang loop ivdep(enable /* expected-error {{expected ')'}} */ #pragma clang loop vectorize_width(4 /* expected-error {{expected ')'}} */ #pragma clang loop interleave_count(4 @@ -151,7 +155,7 @@ /* expected-error {{missing argument; expected 'enable', 'full' or 'disable'}} */ #pragma clang loop unroll() /* expected-error {{missing argument; expected 'enable' or 'disable'}} */ #pragma clang loop distribute() -/* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute}} */ #pragma clang loop +/* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, ivdep, or distribute}} */ #pragma clang loop /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword(enable) /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop vectorize(enable) badkeyword(4) @@ -249,6 +253,8 @@ /* expected-error {{duplicate directives 'vectorize(enable)' and 'vectorize(disable)'}} */ #pragma clang loop vectorize(disable) #pragma clang loop interleave(enable) /* expected-error {{duplicate directives 'interleave(enable)' and 'interleave(disable)'}} */ #pragma clang loop interleave(disable) +#pragma clang loop ivdep(enable) +/* expected-error {{duplicate directives 'ivdep(enable)' and 'ivdep(disable)'}} */ #pragma clang loop ivdep(disable) #pragma clang loop vectorize_predicate(enable) /* expected-error@+1 {{duplicate directives 'vectorize_predicate(enable)' and 'vectorize_predicate(disable)'}} */ #pragma clang loop vectorize_predicate(disable) Index: clang/test/Parser/pragma-unroll-and-jam.cpp =================================================================== --- clang/test/Parser/pragma-unroll-and-jam.cpp +++ clang/test/Parser/pragma-unroll-and-jam.cpp @@ -67,7 +67,7 @@ } // pragma clang unroll_and_jam is disabled for the moment -/* expected-error {{invalid option 'unroll_and_jam'; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute}} */ #pragma clang loop unroll_and_jam(4) +/* expected-error {{invalid option 'unroll_and_jam'; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, ivdep, or distribute}} */ #pragma clang loop unroll_and_jam(4) for (int i = 0; i < Length; i++) { for (int j = 0; j < Length; j++) { List[i * Length + j] = Value;