Index: clang/docs/LanguageExtensions.rst =================================================================== --- clang/docs/LanguageExtensions.rst +++ clang/docs/LanguageExtensions.rst @@ -3023,9 +3023,9 @@ The ``#pragma clang loop`` directive is used to specify hints for optimizing the subsequent for, while, do-while, or c++11 range-based for loop. The directive -provides options for vectorization, interleaving, predication, unrolling and -distribution. Loop hints can be specified before any loop and will be ignored if -the optimization is not safe to apply. +provides options for vectorization, interleaving, predication, ignoring vector +dependencies, unrolling and distribution. Loop hints can be specified before any +loop and will be ignored if the optimization is not safe to apply. There are loop hints that control transformations (e.g. vectorization, loop unrolling) and there are loop hints that set transformation options (e.g. @@ -3094,6 +3094,22 @@ might be more efficient when vector predication is efficiently supported by the target platform. +Ignore Vector Dependencies in loop +---------------------------------- +Dependencies in a loop of unknown dependence type that inhibit vectorization of +the loop can be ignored by using ``ivdep(enable)``. + +.. code-block:: c++ + + #pragma clang loop ivdep(enable) + for(...) { + ... + } + +This hints to the vectorizer that dependencies that have not been determined to +be safe or unsafe for vectorization will be ignored. This pragma will also imply +``vectorize(enable)``. + Loop Unrolling -------------- Index: clang/include/clang/Basic/Attr.td =================================================================== --- clang/include/clang/Basic/Attr.td +++ clang/include/clang/Basic/Attr.td @@ -3036,11 +3036,11 @@ ["vectorize", "vectorize_width", "interleave", "interleave_count", "unroll", "unroll_count", "unroll_and_jam", "unroll_and_jam_count", "pipeline", "pipeline_initiation_interval", "distribute", - "vectorize_predicate"], + "vectorize_predicate", "ivdep"], ["Vectorize", "VectorizeWidth", "Interleave", "InterleaveCount", "Unroll", "UnrollCount", "UnrollAndJam", "UnrollAndJamCount", "PipelineDisabled", "PipelineInitiationInterval", "Distribute", - "VectorizePredicate"]>, + "VectorizePredicate", "Ivdep"]>, EnumArgument<"State", "LoopHintState", ["enable", "disable", "numeric", "assume_safety", "full"], ["Enable", "Disable", "Numeric", "AssumeSafety", "Full"]>, @@ -3061,6 +3061,7 @@ case PipelineInitiationInterval: return "pipeline_initiation_interval"; case Distribute: return "distribute"; case VectorizePredicate: return "vectorize_predicate"; + case Ivdep: return "ivdep"; } llvm_unreachable("Unhandled LoopHint option."); } Index: clang/include/clang/Basic/AttrDocs.td =================================================================== --- clang/include/clang/Basic/AttrDocs.td +++ clang/include/clang/Basic/AttrDocs.td @@ -2787,10 +2787,11 @@ let Content = [{ The ``#pragma clang loop`` directive allows loop optimization hints to be specified for the subsequent loop. The directive allows pipelining to be -disabled, or vectorization, vector predication, interleaving, and unrolling to -be enabled or disabled. Vector width, vector predication, interleave count, -unrolling count, and the initiation interval for pipelining can be explicitly -specified. See `language extensions +disabled, vector dependencies to be ignored, or vectorization, vector +predication, interleaving, and unrolling to be enabled or disabled. Vector +width, vector predication, interleave count, unrolling count, and the +initiation interval for pipelining can be explicitly specified. +See `language extensions `_ for details. }]; Index: clang/include/clang/Basic/DiagnosticParseKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticParseKinds.td +++ clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1231,7 +1231,7 @@ def err_pragma_loop_invalid_option : Error< "%select{invalid|missing}0 option%select{ %1|}0; expected vectorize, " "vectorize_width, interleave, interleave_count, unroll, unroll_count, " - "pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute">; + "pipeline, pipeline_initiation_interval, vectorize_predicate, ivdep, or distribute">; def err_pragma_fp_invalid_option : Error< "%select{invalid|missing}0 option%select{ %1|}0; expected contract">; @@ -1246,6 +1246,8 @@ "invalid argument; expected 'enable'%select{|, 'full'}0%select{|, 'assume_safety'}1 or 'disable'">; def err_pragma_pipeline_invalid_keyword : Error< "invalid argument; expected 'disable'">; +def err_pragma_ivdep_invalid_keyword : Error< + "invalid argument; expected 'enable'">; // Pragma unroll support. def warn_pragma_unroll_cuda_value_in_parens : Warning< Index: clang/lib/CodeGen/CGLoopInfo.h =================================================================== --- clang/lib/CodeGen/CGLoopInfo.h +++ clang/lib/CodeGen/CGLoopInfo.h @@ -54,6 +54,9 @@ /// Value for llvm.loop.vectorize.predicate metadata LVEnableState VectorizePredicateEnable; + /// Value for llvm.loop.vectorize.ivdep.enable metadata. + bool IvdepEnable; + /// Value for llvm.loop.vectorize.width metadata. unsigned VectorizeWidth; @@ -250,6 +253,9 @@ StagedAttrs.UnrollAndJamEnable = State; } + /// Set the next pushed loop ivdep enable state. + void setIvdepEnable(bool S) { StagedAttrs.IvdepEnable = S; } + /// Set the vectorize width for the next loop pushed. void setVectorizeWidth(unsigned W) { StagedAttrs.VectorizeWidth = W; } Index: clang/lib/CodeGen/CGLoopInfo.cpp =================================================================== --- clang/lib/CodeGen/CGLoopInfo.cpp +++ clang/lib/CodeGen/CGLoopInfo.cpp @@ -268,6 +268,16 @@ Args.push_back(MDNode::get(Ctx, Vals)); } + // Setting vectorize.ivdep.enable + if (Attrs.IvdepEnable == true && + Attrs.VectorizeEnable != LoopAttributes::Disable) { + Metadata *Vals[] = { + MDString::get(Ctx,"llvm.loop.vectorize.ivdep.enable"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt1Ty(Ctx), + true))}; + Args.push_back(MDNode::get(Ctx,Vals)); + } + // Setting vectorize.width if (Attrs.VectorizeWidth > 0) { // This implies vectorize.enable = true, but only add it when it is not @@ -440,8 +450,9 @@ : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified), UnrollEnable(LoopAttributes::Unspecified), UnrollAndJamEnable(LoopAttributes::Unspecified), - VectorizePredicateEnable(LoopAttributes::Unspecified), VectorizeWidth(0), - InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0), + VectorizePredicateEnable(LoopAttributes::Unspecified), + IvdepEnable(false), VectorizeWidth(0), InterleaveCount(0), + UnrollCount(0), UnrollAndJamCount(0), DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false), PipelineInitiationInterval(0) {} @@ -455,6 +466,7 @@ UnrollEnable = LoopAttributes::Unspecified; UnrollAndJamEnable = LoopAttributes::Unspecified; VectorizePredicateEnable = LoopAttributes::Unspecified; + IvdepEnable = false; DistributeEnable = LoopAttributes::Unspecified; PipelineDisabled = false; PipelineInitiationInterval = 0; @@ -475,7 +487,7 @@ if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && - Attrs.PipelineInitiationInterval == 0 && + Attrs.PipelineInitiationInterval == 0 && !Attrs.IvdepEnable && Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified && Attrs.VectorizeEnable == LoopAttributes::Unspecified && Attrs.UnrollEnable == LoopAttributes::Unspecified && @@ -512,6 +524,7 @@ BeforeJam.VectorizeEnable = Attrs.VectorizeEnable; BeforeJam.DistributeEnable = Attrs.DistributeEnable; BeforeJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable; + BeforeJam.IvdepEnable = Attrs.IvdepEnable; switch (Attrs.UnrollEnable) { case LoopAttributes::Unspecified: @@ -528,6 +541,7 @@ } AfterJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable; + AfterJam.IvdepEnable = Attrs.IvdepEnable; AfterJam.UnrollCount = Attrs.UnrollCount; AfterJam.PipelineDisabled = Attrs.PipelineDisabled; AfterJam.PipelineInitiationInterval = Attrs.PipelineInitiationInterval; @@ -647,6 +661,7 @@ case LoopHintAttr::PipelineDisabled: setPipelineDisabled(true); break; + case LoopHintAttr::Ivdep: case LoopHintAttr::UnrollCount: case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: @@ -674,6 +689,10 @@ case LoopHintAttr::Distribute: setDistributeState(true); break; + case LoopHintAttr::Ivdep: + setIvdepEnable(true); + setVectorizeEnable(true); + break; case LoopHintAttr::UnrollCount: case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: @@ -695,6 +714,7 @@ case LoopHintAttr::Unroll: case LoopHintAttr::UnrollAndJam: case LoopHintAttr::VectorizePredicate: + case LoopHintAttr::Ivdep: case LoopHintAttr::UnrollCount: case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: @@ -716,6 +736,7 @@ break; case LoopHintAttr::Vectorize: case LoopHintAttr::Interleave: + case LoopHintAttr::Ivdep: case LoopHintAttr::UnrollCount: case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: @@ -749,6 +770,7 @@ case LoopHintAttr::UnrollAndJam: case LoopHintAttr::VectorizePredicate: case LoopHintAttr::Vectorize: + case LoopHintAttr::Ivdep: case LoopHintAttr::Interleave: case LoopHintAttr::Distribute: case LoopHintAttr::PipelineDisabled: Index: clang/lib/Parse/ParsePragma.cpp =================================================================== --- clang/lib/Parse/ParsePragma.cpp +++ clang/lib/Parse/ParsePragma.cpp @@ -1057,23 +1057,26 @@ bool OptionUnrollAndJam = false; bool OptionDistribute = false; bool OptionPipelineDisabled = false; + bool OptionIvdepEnabled = false; bool StateOption = false; if (OptionInfo) { // Pragma Unroll does not specify an option. OptionUnroll = OptionInfo->isStr("unroll"); OptionUnrollAndJam = OptionInfo->isStr("unroll_and_jam"); OptionDistribute = OptionInfo->isStr("distribute"); OptionPipelineDisabled = OptionInfo->isStr("pipeline"); + OptionIvdepEnabled = OptionInfo->isStr("ivdep"); StateOption = llvm::StringSwitch(OptionInfo->getName()) .Case("vectorize", true) .Case("interleave", true) .Case("vectorize_predicate", true) .Default(false) || OptionUnroll || OptionUnrollAndJam || OptionDistribute || - OptionPipelineDisabled; + OptionPipelineDisabled || OptionIvdepEnabled; } bool AssumeSafetyArg = !OptionUnroll && !OptionUnrollAndJam && - !OptionDistribute && !OptionPipelineDisabled; + !OptionDistribute && !OptionPipelineDisabled && + !OptionIvdepEnabled; // Verify loop hint has an argument. if (Toks[0].is(tok::eof)) { ConsumeAnnotationToken(); @@ -1092,7 +1095,7 @@ bool Valid = StateInfo && llvm::StringSwitch(StateInfo->getName()) - .Case("disable", true) + .Case("disable", !OptionIvdepEnabled) .Case("enable", !OptionPipelineDisabled) .Case("full", OptionUnroll || OptionUnrollAndJam) .Case("assume_safety", AssumeSafetyArg) @@ -1100,6 +1103,8 @@ if (!Valid) { if (OptionPipelineDisabled) { Diag(Toks[0].getLocation(), diag::err_pragma_pipeline_invalid_keyword); + } else if (OptionIvdepEnabled) { + Diag(Toks[0].getLocation(), diag::err_pragma_ivdep_invalid_keyword); } else { Diag(Toks[0].getLocation(), diag::err_pragma_invalid_keyword) << /*FullKeyword=*/(OptionUnroll || OptionUnrollAndJam) @@ -2823,6 +2828,7 @@ /// 'interleave' '(' loop-hint-keyword ')' /// 'unroll' '(' unroll-hint-keyword ')' /// 'vectorize_predicate' '(' loop-hint-keyword ')' +/// 'ivdep' '(' enable ')' /// 'vectorize_width' '(' loop-hint-value ')' /// 'interleave_count' '(' loop-hint-value ')' /// 'unroll_count' '(' loop-hint-value ')' @@ -2885,6 +2891,7 @@ .Case("unroll", true) .Case("distribute", true) .Case("vectorize_predicate", true) + .Case("ivdep", true) .Case("vectorize_width", true) .Case("interleave_count", true) .Case("unroll_count", true) Index: clang/lib/Sema/SemaStmtAttr.cpp =================================================================== --- clang/lib/Sema/SemaStmtAttr.cpp +++ clang/lib/Sema/SemaStmtAttr.cpp @@ -130,6 +130,7 @@ .Case("vectorize_width", LoopHintAttr::VectorizeWidth) .Case("interleave", LoopHintAttr::Interleave) .Case("vectorize_predicate", LoopHintAttr::VectorizePredicate) + .Case("ivdep", LoopHintAttr::Ivdep) .Case("interleave_count", LoopHintAttr::InterleaveCount) .Case("unroll", LoopHintAttr::Unroll) .Case("unroll_count", LoopHintAttr::UnrollCount) @@ -149,6 +150,7 @@ } else if (Option == LoopHintAttr::Vectorize || Option == LoopHintAttr::Interleave || Option == LoopHintAttr::VectorizePredicate || + Option == LoopHintAttr::Ivdep || Option == LoopHintAttr::Unroll || Option == LoopHintAttr::Distribute || Option == LoopHintAttr::PipelineDisabled) { @@ -187,7 +189,7 @@ const LoopHintAttr *NumericAttr; } HintAttrs[] = {{nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}, - {nullptr, nullptr}}; + {nullptr, nullptr}, {nullptr, nullptr}}; for (const auto *I : Attrs) { const LoopHintAttr *LH = dyn_cast(I); @@ -200,6 +202,7 @@ enum { Vectorize, Interleave, + Ivdep, Unroll, UnrollAndJam, Distribute, @@ -234,13 +237,18 @@ case LoopHintAttr::VectorizePredicate: Category = VectorizePredicate; break; + case LoopHintAttr::Ivdep: + Category = Ivdep; + break; }; assert(Category < sizeof(HintAttrs) / sizeof(HintAttrs[0])); auto &CategoryState = HintAttrs[Category]; const LoopHintAttr *PrevAttr; if (Option == LoopHintAttr::Vectorize || - Option == LoopHintAttr::Interleave || Option == LoopHintAttr::Unroll || + Option == LoopHintAttr::Interleave || + Option == LoopHintAttr::Ivdep || + Option == LoopHintAttr::Unroll || Option == LoopHintAttr::UnrollAndJam || Option == LoopHintAttr::VectorizePredicate || Option == LoopHintAttr::PipelineDisabled || Index: clang/test/AST/ast-print-pragmas.cpp =================================================================== --- clang/test/AST/ast-print-pragmas.cpp +++ clang/test/AST/ast-print-pragmas.cpp @@ -33,11 +33,13 @@ // CHECK-NEXT: #pragma clang loop vectorize(disable) // CHECK-NEXT: #pragma clang loop interleave(enable) // CHECK-NEXT: #pragma clang loop vectorize_predicate(enable) +// CHECK-NEXT: #pragma clang loop ivdep(enable) #pragma clang loop distribute(enable) #pragma clang loop vectorize(disable) #pragma clang loop interleave(enable) #pragma clang loop vectorize_predicate(enable) +#pragma clang loop ivdep(enable) // CHECK-NEXT: while (i - 2 < Length) while (i - 2 < Length) { List[i] = i * 2; @@ -66,4 +68,3 @@ // MS-EXT-NEXT: int x = 3 __declspec(thread); int __declspec(thread) x = 3; #endif //MS_EXT - Index: clang/test/CodeGenCXX/pragma-loop-ivdep.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenCXX/pragma-loop-ivdep.cpp @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++11 -emit-llvm -o - %s | FileCheck %s + +void test0(int *a, int *b, int LEN_1D) { + // CHECK-LABEL: @{{.*}}test0{{.*}}( + // CHECK: br label {{.*}}, !llvm.loop ![[LOOP0:.*]] + +#pragma clang loop vectorize(enable) + for (int i = 0; i < LEN_1D; i++) + a[b[i]] = a[b[i]] + 1; +} + +void test1(int *a, int *b, int LEN_1D) { + // CHECK-LABEL: @{{.*}}test1{{.*}}( + // CHECK: br label {{.*}}, !llvm.loop ![[LOOP1:.*]] + +#pragma clang loop ivdep(enable) + for (int i = 0; i < LEN_1D; i++) + a[b[i]] = a[b[i]] + 1; +} + +void test2(int *a, int *b, int LEN_1D) { + // CHECK-LABEL: @{{.*}}test2{{.*}}( + // CHECK: br label {{.*}}, !llvm.loop ![[LOOP2:.*]] + +#pragma clang loop vectorize(enable) ivdep(enable) + for (int i = 0; i < LEN_1D; i++) + a[b[i]] = a[b[i]] + 1; +} + +// CHECK: ![[LOOP0]] = distinct !{![[LOOP0]], !3} +// CHECK-NEXT: !3 = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-NEXT: ![[LOOP1]] = distinct !{![[LOOP1]], !5, !3} +// CHECK-NEXT: !5 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true} +// CHECK-NEXT: ![[LOOP2]] = distinct !{![[LOOP2]], !5, !3} Index: clang/test/Parser/pragma-loop.cpp =================================================================== --- clang/test/Parser/pragma-loop.cpp +++ clang/test/Parser/pragma-loop.cpp @@ -82,6 +82,7 @@ #pragma clang loop vectorize(enable) #pragma clang loop interleave(enable) #pragma clang loop vectorize_predicate(enable) +#pragma clang loop ivdep(enable) #pragma clang loop unroll(full) while (i + 1 < Length) { List[i] = i; @@ -135,12 +136,14 @@ /* expected-error {{expected '('}} */ #pragma clang loop vectorize_predicate /* expected-error {{expected '('}} */ #pragma clang loop unroll /* expected-error {{expected '('}} */ #pragma clang loop distribute +/* expected-error {{expected '('}} */ #pragma clang loop ivdep /* expected-error {{expected ')'}} */ #pragma clang loop vectorize(enable /* expected-error {{expected ')'}} */ #pragma clang loop interleave(enable /* expected-error {{expected ')'}} */ #pragma clang loop vectorize_predicate(enable /* expected-error {{expected ')'}} */ #pragma clang loop unroll(full /* expected-error {{expected ')'}} */ #pragma clang loop distribute(enable +/* expected-error {{expected ')'}} */ #pragma clang loop ivdep(enable /* expected-error {{expected ')'}} */ #pragma clang loop vectorize_width(4 /* expected-error {{expected ')'}} */ #pragma clang loop interleave_count(4 @@ -151,7 +154,7 @@ /* expected-error {{missing argument; expected 'enable', 'full' or 'disable'}} */ #pragma clang loop unroll() /* expected-error {{missing argument; expected 'enable' or 'disable'}} */ #pragma clang loop distribute() -/* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute}} */ #pragma clang loop +/* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, ivdep, or distribute}} */ #pragma clang loop /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword(enable) /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop vectorize(enable) badkeyword(4) @@ -205,6 +208,7 @@ /* expected-error {{invalid argument; expected 'enable', 'assume_safety' or 'disable'}} */ #pragma clang loop interleave(badidentifier) /* expected-error {{invalid argument; expected 'enable', 'full' or 'disable'}} */ #pragma clang loop unroll(badidentifier) /* expected-error {{invalid argument; expected 'enable' or 'disable'}} */ #pragma clang loop distribute(badidentifier) +/* expected-error {{invalid argument; expected 'enable'}} */ #pragma clang loop ivdep(badidentifier) while (i-7 < Length) { List[i] = i; } Index: clang/test/Parser/pragma-unroll-and-jam.cpp =================================================================== --- clang/test/Parser/pragma-unroll-and-jam.cpp +++ clang/test/Parser/pragma-unroll-and-jam.cpp @@ -67,7 +67,7 @@ } // pragma clang unroll_and_jam is disabled for the moment -/* expected-error {{invalid option 'unroll_and_jam'; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute}} */ #pragma clang loop unroll_and_jam(4) +/* expected-error {{invalid option 'unroll_and_jam'; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, ivdep, or distribute}} */ #pragma clang loop unroll_and_jam(4) for (int i = 0; i < Length; i++) { for (int j = 0; j < Length; j++) { List[i * Length + j] = Value;