Index: clang/docs/LanguageExtensions.rst =================================================================== --- clang/docs/LanguageExtensions.rst +++ clang/docs/LanguageExtensions.rst @@ -4024,9 +4024,9 @@ The ``#pragma clang loop`` directive is used to specify hints for optimizing the subsequent for, while, do-while, or c++11 range-based for loop. The directive -provides options for vectorization, interleaving, predication, unrolling and -distribution. Loop hints can be specified before any loop and will be ignored if -the optimization is not safe to apply. +provides options for vectorization, interleaving, predication, unrolling, +distribution and prefetch. Loop hints can be specified before any loop and will be +ignored if the optimization is not safe to apply. There are loop hints that control transformations (e.g. vectorization, loop unrolling) and there are loop hints that set transformation options (e.g. @@ -4105,6 +4105,25 @@ might be more efficient when vector predication is efficiently supported by the target platform. + +Loop Prefetch +------------------ + +Loop data prefetch can effectively improve the performance of programs affected +by cache miss, but poor prefetching methods may actually cause performance to +degrade. If ``prefetch(disable)`` is specified, the compiler's automatic +prefetching of loop data can be prevented. + +.. code-block:: c++ + + #pragma clang loop prefetch(disable) + for (...) { + ... + } + +Only data prefetch via ``__builtin_prefetch`` will work when ``prefetch(disable)`` +is specified. + Loop Unrolling -------------- Index: clang/include/clang/Basic/Attr.td =================================================================== --- clang/include/clang/Basic/Attr.td +++ clang/include/clang/Basic/Attr.td @@ -3648,6 +3648,7 @@ /// distribute: attempt to distribute loop if State == Enable. /// pipeline: disable pipelining loop if State == Disable. /// pipeline_initiation_interval: create loop schedule with initiation interval equal to 'Value'. + /// prefetch: disable loop data prefetch if State == Disable. /// #pragma unroll directive /// : fully unrolls loop. @@ -3663,11 +3664,11 @@ ["vectorize", "vectorize_width", "interleave", "interleave_count", "unroll", "unroll_count", "unroll_and_jam", "unroll_and_jam_count", "pipeline", "pipeline_initiation_interval", "distribute", - "vectorize_predicate"], + "vectorize_predicate", "prefetch"], ["Vectorize", "VectorizeWidth", "Interleave", "InterleaveCount", "Unroll", "UnrollCount", "UnrollAndJam", "UnrollAndJamCount", "PipelineDisabled", "PipelineInitiationInterval", "Distribute", - "VectorizePredicate"]>, + "VectorizePredicate", "PrefetchDisabled"]>, EnumArgument<"State", "LoopHintState", ["enable", "disable", "numeric", "fixed_width", "scalable_width", "assume_safety", "full"], @@ -3690,6 +3691,7 @@ case PipelineInitiationInterval: return "pipeline_initiation_interval"; case Distribute: return "distribute"; case VectorizePredicate: return "vectorize_predicate"; + case PrefetchDisabled: return "prefetch"; } llvm_unreachable("Unhandled LoopHint option."); } Index: clang/include/clang/Basic/DiagnosticParseKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticParseKinds.td +++ clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1523,12 +1523,13 @@ // Pragma loop support. def err_pragma_loop_missing_argument : Error< - "missing argument; expected %select{an integer value|" - "'enable'%select{|, 'full'}1%select{|, 'assume_safety'}2 or 'disable'}0">; + "missing argument; expected %select{an integer value|%select{" + "'enable'%select{|, 'full'}2%select{|, 'assume_safety'}3 or 'disable'|'disable'}1}0">; def err_pragma_loop_invalid_option : Error< "%select{invalid|missing}0 option%select{ %1|}0; expected vectorize, " "vectorize_width, interleave, interleave_count, unroll, unroll_count, " - "pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute">; + "pipeline, pipeline_initiation_interval, vectorize_predicate, distribute, " + "or prefetch">; def err_pragma_loop_invalid_vectorize_option : Error< "vectorize_width loop hint malformed; use vectorize_width(X, fixed) or " "vectorize_width(X, scalable) where X is an integer, or vectorize_width('fixed' or 'scalable')">; @@ -1548,7 +1549,7 @@ def err_pragma_invalid_keyword : Error< "invalid argument; expected 'enable'%select{|, 'full'}0%select{|, 'assume_safety'}1 or 'disable'">; -def err_pragma_pipeline_invalid_keyword : Error< +def err_pragma_invalid_keyword_disable : Error< "invalid argument; expected 'disable'">; // Pragma unroll support. Index: clang/lib/CodeGen/CGLoopInfo.h =================================================================== --- clang/lib/CodeGen/CGLoopInfo.h +++ clang/lib/CodeGen/CGLoopInfo.h @@ -70,6 +70,9 @@ /// llvm.unroll. unsigned UnrollAndJamCount; + /// Value for llvm.loop.prefetch.disable metadata. + bool PrefetchDisabled; + /// Value for llvm.loop.distribute.enable metadata. LVEnableState DistributeEnable; @@ -172,6 +175,10 @@ createFullUnrollMetadata(const LoopAttributes &Attrs, llvm::ArrayRef LoopProperties, bool &HasUserTransforms); + llvm::MDNode * + createLoopPrefetchMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef LoopProperties, + bool &HasUserTransforms); /// @} /// Create a LoopID for this loop, including transformation-unspecific @@ -282,6 +289,9 @@ StagedAttrs.PipelineInitiationInterval = C; } + /// Set the next pushed loop prefetch stat. + void setPrefetchDisabled(bool S) { StagedAttrs.PrefetchDisabled = S; } + /// Set no progress for the next loop pushed. void setMustProgress(bool P) { StagedAttrs.MustProgress = P; } Index: clang/lib/CodeGen/CGLoopInfo.cpp =================================================================== --- clang/lib/CodeGen/CGLoopInfo.cpp +++ clang/lib/CodeGen/CGLoopInfo.cpp @@ -376,6 +376,53 @@ return LoopID; } +MDNode * +LoopInfo::createLoopPrefetchMetadata(const LoopAttributes &Attrs, + ArrayRef LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + std::optional Enabled; + if (Attrs.PrefetchDisabled) + Enabled = false; + + if (Enabled != false) { + SmallVector NewLoopProperties; + if (Enabled == true) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.prefetch.disable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), 0))})); + LoopProperties = NewLoopProperties; + } + return createLoopDistributeMetadata(Attrs, LoopProperties, + HasUserTransforms); + } + + bool FollowupHasTransforms = false; + MDNode *Followup = + createLoopDistributeMetadata(Attrs, LoopProperties, FollowupHasTransforms); + + SmallVector Args; + Args.push_back(nullptr); + Args.append(LoopProperties.begin(), LoopProperties.end()); + + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.prefetch.disable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), 1))}; + Args.push_back(MDNode::get(Ctx, Vals)); + + if (FollowupHasTransforms) + Args.push_back(MDNode::get( + Ctx, + {MDString::get(Ctx, "llvm.loop.prefetch.followup_all"), Followup})); + + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + MDNode *LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs, ArrayRef LoopProperties, bool &HasUserTransforms) { @@ -395,8 +442,8 @@ MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.disable"))); LoopProperties = NewLoopProperties; } - return createLoopDistributeMetadata(Attrs, LoopProperties, - HasUserTransforms); + return createLoopPrefetchMetadata(Attrs, LoopProperties, + HasUserTransforms); } SmallVector Args; @@ -451,7 +498,7 @@ UnrollAndJamEnable(LoopAttributes::Unspecified), VectorizePredicateEnable(LoopAttributes::Unspecified), VectorizeWidth(0), VectorizeScalable(LoopAttributes::Unspecified), InterleaveCount(0), - UnrollCount(0), UnrollAndJamCount(0), + UnrollCount(0), UnrollAndJamCount(0), PrefetchDisabled(false), DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false), PipelineInitiationInterval(0), MustProgress(false) {} @@ -466,6 +513,7 @@ UnrollEnable = LoopAttributes::Unspecified; UnrollAndJamEnable = LoopAttributes::Unspecified; VectorizePredicateEnable = LoopAttributes::Unspecified; + PrefetchDisabled = false; DistributeEnable = LoopAttributes::Unspecified; PipelineDisabled = false; PipelineInitiationInterval = 0; @@ -493,8 +541,9 @@ Attrs.VectorizeEnable == LoopAttributes::Unspecified && Attrs.UnrollEnable == LoopAttributes::Unspecified && Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && - Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc && - !EndLoc && !Attrs.MustProgress) + Attrs.DistributeEnable == LoopAttributes::Unspecified && + !Attrs.PrefetchDisabled && !StartLoc && !EndLoc && + !Attrs.MustProgress) return; TempLoopID = MDNode::getTemporary(Header->getContext(), std::nullopt); @@ -526,6 +575,7 @@ BeforeJam.VectorizeEnable = Attrs.VectorizeEnable; BeforeJam.DistributeEnable = Attrs.DistributeEnable; BeforeJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable; + BeforeJam.PrefetchDisabled = Attrs.PrefetchDisabled; switch (Attrs.UnrollEnable) { case LoopAttributes::Unspecified: @@ -545,6 +595,7 @@ AfterJam.UnrollCount = Attrs.UnrollCount; AfterJam.PipelineDisabled = Attrs.PipelineDisabled; AfterJam.PipelineInitiationInterval = Attrs.PipelineInitiationInterval; + AfterJam.PrefetchDisabled = Attrs.PrefetchDisabled; // If this loop is subject of an unroll-and-jam by the parent loop, and has // an unroll-and-jam annotation itself, we have to decide whether to first @@ -663,6 +714,9 @@ case LoopHintAttr::PipelineDisabled: setPipelineDisabled(true); break; + case LoopHintAttr::PrefetchDisabled: + setPrefetchDisabled(true); + break; case LoopHintAttr::UnrollCount: case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: @@ -696,6 +750,7 @@ case LoopHintAttr::InterleaveCount: case LoopHintAttr::PipelineDisabled: case LoopHintAttr::PipelineInitiationInterval: + case LoopHintAttr::PrefetchDisabled: llvm_unreachable("Options cannot enabled."); break; } @@ -718,6 +773,7 @@ case LoopHintAttr::Distribute: case LoopHintAttr::PipelineDisabled: case LoopHintAttr::PipelineInitiationInterval: + case LoopHintAttr::PrefetchDisabled: llvm_unreachable("Options cannot be used to assume mem safety."); break; } @@ -740,6 +796,7 @@ case LoopHintAttr::PipelineDisabled: case LoopHintAttr::PipelineInitiationInterval: case LoopHintAttr::VectorizePredicate: + case LoopHintAttr::PrefetchDisabled: llvm_unreachable("Options cannot be used with 'full' hint."); break; } @@ -781,6 +838,7 @@ case LoopHintAttr::Interleave: case LoopHintAttr::Distribute: case LoopHintAttr::PipelineDisabled: + case LoopHintAttr::PrefetchDisabled: llvm_unreachable("Options cannot be assigned a value."); break; } Index: clang/lib/Parse/ParsePragma.cpp =================================================================== --- clang/lib/Parse/ParsePragma.cpp +++ clang/lib/Parse/ParsePragma.cpp @@ -1340,28 +1340,32 @@ bool OptionUnrollAndJam = false; bool OptionDistribute = false; bool OptionPipelineDisabled = false; + bool OptionPrefetchDisabled = false; bool StateOption = false; if (OptionInfo) { // Pragma Unroll does not specify an option. OptionUnroll = OptionInfo->isStr("unroll"); OptionUnrollAndJam = OptionInfo->isStr("unroll_and_jam"); OptionDistribute = OptionInfo->isStr("distribute"); OptionPipelineDisabled = OptionInfo->isStr("pipeline"); + OptionPrefetchDisabled = OptionInfo->isStr("prefetch"); StateOption = llvm::StringSwitch(OptionInfo->getName()) .Case("vectorize", true) .Case("interleave", true) .Case("vectorize_predicate", true) .Default(false) || OptionUnroll || OptionUnrollAndJam || OptionDistribute || - OptionPipelineDisabled; + OptionPipelineDisabled || OptionPrefetchDisabled; } bool AssumeSafetyArg = !OptionUnroll && !OptionUnrollAndJam && - !OptionDistribute && !OptionPipelineDisabled; + !OptionDistribute && !OptionPipelineDisabled && + !OptionPrefetchDisabled; // Verify loop hint has an argument. if (Toks[0].is(tok::eof)) { ConsumeAnnotationToken(); Diag(Toks[0].getLocation(), diag::err_pragma_loop_missing_argument) << /*StateArgument=*/StateOption + << /*OnlyDisable=*/OptionPrefetchDisabled << /*FullKeyword=*/(OptionUnroll || OptionUnrollAndJam) << /*AssumeSafetyKeyword=*/AssumeSafetyArg; return false; @@ -1376,13 +1380,14 @@ bool Valid = StateInfo && llvm::StringSwitch(StateInfo->getName()) .Case("disable", true) - .Case("enable", !OptionPipelineDisabled) + .Case("enable", !OptionPipelineDisabled && !OptionPrefetchDisabled) .Case("full", OptionUnroll || OptionUnrollAndJam) .Case("assume_safety", AssumeSafetyArg) .Default(false); if (!Valid) { - if (OptionPipelineDisabled) { - Diag(Toks[0].getLocation(), diag::err_pragma_pipeline_invalid_keyword); + // TODO: Add support for `prefetch(enable)` + if (OptionPipelineDisabled || OptionPrefetchDisabled) { + Diag(Toks[0].getLocation(), diag::err_pragma_invalid_keyword_disable); } else { Diag(Toks[0].getLocation(), diag::err_pragma_invalid_keyword) << /*FullKeyword=*/(OptionUnroll || OptionUnrollAndJam) @@ -3440,6 +3445,7 @@ /// 'unroll_count' '(' loop-hint-value ')' /// 'pipeline' '(' disable ')' /// 'pipeline_initiation_interval' '(' loop-hint-value ')' +/// 'prefetch' '(' disable ')' /// /// loop-hint-keyword: /// 'enable' @@ -3502,6 +3508,7 @@ .Case("unroll_count", true) .Case("pipeline", true) .Case("pipeline_initiation_interval", true) + .Case("prefetch", true) .Default(false); if (!OptionValid) { PP.Diag(Tok.getLocation(), diag::err_pragma_loop_invalid_option) Index: clang/lib/Sema/SemaStmtAttr.cpp =================================================================== --- clang/lib/Sema/SemaStmtAttr.cpp +++ clang/lib/Sema/SemaStmtAttr.cpp @@ -133,6 +133,7 @@ .Case("pipeline_initiation_interval", LoopHintAttr::PipelineInitiationInterval) .Case("distribute", LoopHintAttr::Distribute) + .Case("prefetch", LoopHintAttr::PrefetchDisabled) .Default(LoopHintAttr::Vectorize); if (Option == LoopHintAttr::VectorizeWidth) { assert((ValueExpr || (StateLoc && StateLoc->Ident)) && @@ -155,7 +156,8 @@ Option == LoopHintAttr::VectorizePredicate || Option == LoopHintAttr::Unroll || Option == LoopHintAttr::Distribute || - Option == LoopHintAttr::PipelineDisabled) { + Option == LoopHintAttr::PipelineDisabled || + Option == LoopHintAttr::PrefetchDisabled) { assert(StateLoc && StateLoc->Ident && "Loop hint must have an argument"); if (StateLoc->Ident->isStr("disable")) State = LoopHintAttr::Disable; @@ -325,6 +327,9 @@ // The vector predication only has a state form that is exposed by // #pragma clang loop vectorize_predicate (enable | disable). VectorizePredicate, + // The loop data prefetch only has a state form that is exposed by + // #pragma clang loop prefetch (disable) + Prefetch, // This serves as a indicator to how many category are listed in this enum. NumberOfCategories }; @@ -372,6 +377,9 @@ case LoopHintAttr::VectorizePredicate: Category = VectorizePredicate; break; + case LoopHintAttr::PrefetchDisabled: + Category = Prefetch; + break; }; assert(Category != NumberOfCategories && "Unhandled loop hint option"); @@ -382,7 +390,8 @@ Option == LoopHintAttr::UnrollAndJam || Option == LoopHintAttr::VectorizePredicate || Option == LoopHintAttr::PipelineDisabled || - Option == LoopHintAttr::Distribute) { + Option == LoopHintAttr::Distribute || + Option == LoopHintAttr::PrefetchDisabled) { // Enable|Disable|AssumeSafety hint. For example, vectorize(enable). PrevAttr = CategoryState.StateAttr; CategoryState.StateAttr = LH; Index: clang/test/CodeGenCXX/pragma-loop-prefetch.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenCXX/pragma-loop-prefetch.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -std=c++11 -emit-llvm -o - %s | FileCheck %s + +void while_test(int *List, int Length, int *List2, int Length2) { + // CHECK: define {{.*}} @_Z10while_test + int i = 0; + +#pragma clang loop prefetch(disable) + while (i < Length) { + // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_1:.*]] + List[i] = i * 2; + i++; + } + + i = 0; + while (i < Length2) { + // CHECK: br label {{.*}}, !llvm.loop [[LOOP_2:![0-9]+]] + List2[i] = i * 2; + i++; + } +} + +// CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], [[MP:![0-9]+]], ![[PREFETCH_DISABLE:.*]]} +// CHECK: ![[PREFETCH_DISABLE]] = !{!"llvm.loop.prefetch.disable", i1 true} +// CHECK: [[LOOP_2]] = distinct !{[[LOOP_2]], [[MP]]} Index: clang/test/CodeGenCXX/pragma-loop.cpp =================================================================== --- clang/test/CodeGenCXX/pragma-loop.cpp +++ clang/test/CodeGenCXX/pragma-loop.cpp @@ -5,6 +5,7 @@ // CHECK: define {{.*}} @_Z10while_test int i = 0; +#pragma clang loop prefetch(disable) #pragma clang loop vectorize(enable) #pragma clang loop interleave_count(4) #pragma clang loop vectorize_width(4) Index: clang/test/Parser/pragma-loop.cpp =================================================================== --- clang/test/Parser/pragma-loop.cpp +++ clang/test/Parser/pragma-loop.cpp @@ -104,6 +104,7 @@ #pragma clang loop interleave(disable) #pragma clang loop vectorize_predicate(disable) #pragma clang loop unroll(disable) +#pragma clang loop prefetch(disable) while (i - 1 < Length) { List[i] = i; } @@ -141,12 +142,14 @@ /* expected-error {{expected '('}} */ #pragma clang loop vectorize_predicate /* expected-error {{expected '('}} */ #pragma clang loop unroll /* expected-error {{expected '('}} */ #pragma clang loop distribute +/* expected-error {{expected '('}} */ #pragma clang loop prefetch /* expected-error {{expected ')'}} */ #pragma clang loop vectorize(enable /* expected-error {{expected ')'}} */ #pragma clang loop interleave(enable /* expected-error {{expected ')'}} */ #pragma clang loop vectorize_predicate(enable /* expected-error {{expected ')'}} */ #pragma clang loop unroll(full /* expected-error {{expected ')'}} */ #pragma clang loop distribute(enable +/* expected-error {{expected ')'}} */ #pragma clang loop prefetch(disable /* expected-error {{expected ')'}} */ #pragma clang loop vectorize_width(4 /* expected-error {{expected ')'}} */ #pragma clang loop interleave_count(4 @@ -156,8 +159,9 @@ /* expected-error {{missing argument; expected an integer value}} */ #pragma clang loop interleave_count() /* expected-error {{missing argument; expected 'enable', 'full' or 'disable'}} */ #pragma clang loop unroll() /* expected-error {{missing argument; expected 'enable' or 'disable'}} */ #pragma clang loop distribute() +/* expected-error {{missing argument; expected 'disable'}} */ #pragma clang loop prefetch() -/* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute}} */ #pragma clang loop +/* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, distribute, or prefetch}} */ #pragma clang loop /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword(enable) /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop vectorize(enable) badkeyword(4) @@ -295,6 +299,11 @@ List[i] = i; } +/* expected-error {{invalid argument; expected 'disable'}} */ #pragma clang loop prefetch(enable) + while (i-11 < Length) { + List[i] = i; + } + #pragma clang loop interleave(enable) /* expected-error {{expected statement}} */ } @@ -336,4 +345,5 @@ while (i-6 < Length) { List[i] = i; } + } Index: clang/test/Parser/pragma-unroll-and-jam.cpp =================================================================== --- clang/test/Parser/pragma-unroll-and-jam.cpp +++ clang/test/Parser/pragma-unroll-and-jam.cpp @@ -67,7 +67,7 @@ } // pragma clang unroll_and_jam is disabled for the moment -/* expected-error {{invalid option 'unroll_and_jam'; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute}} */ #pragma clang loop unroll_and_jam(4) +/* expected-error {{invalid option 'unroll_and_jam'; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, distribute, or prefetch}} */ #pragma clang loop unroll_and_jam(4) for (int i = 0; i < Length; i++) { for (int j = 0; j < Length; j++) { List[i * Length + j] = Value; Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -7069,6 +7069,25 @@ loop distribution pass. See :ref:`Transformation Metadata ` for details. +'``llvm.loop.prefetch.disable``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +LoopDataPrefetch allows the compiler to automatically set loop data +prefetching. This metadata can be used to disable data prefetching +for a specific loop. The metadata has a single operand which is the +string ``llvm.loop.prefetch.disble``. + +.. code-block:: llvm + + !0 = !{!"llvm.loop.prefetch.disable", i1 1} + +'``llvm.loop.prefetch.followup_all``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The attributes in this metadata is added to all followup loops of the +loop distribution pass. See +:ref:`Transformation Metadata ` for details. + '``llvm.licm.disable``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: llvm/docs/TransformMetadata.rst =================================================================== --- llvm/docs/TransformMetadata.rst +++ llvm/docs/TransformMetadata.rst @@ -375,6 +375,15 @@ Currently, this transformation does not support followup-attributes. +Loop Data Prefetch +------------------ + +The LoopDataPrefetch pass tries to insert intrinsic to prefetch data +in loop. But there is no transformation of the loop structure. + +Attributes defined in ``llvm.loop.prefetch.followup_all`` are added to +all of the aforementioned output loops. + Loop Interchange ---------------- Index: llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -274,6 +274,19 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) { bool MadeChange = false; + std::optional Val = + findStringMetadataForLoop(L, "llvm.loop.prefetch.disable"); + if (Val) { + const MDOperand *Op = *Val; + assert(Op && mdconst::hasa(*Op) && "invalid metadata"); + if (mdconst::extract(*Op)->getZExtValue()) { + LLVM_DEBUG( + dbgs() << "Pragma hint prevents data prefetching for loop: " + << *L); + return MadeChange; + } + } + // Only prefetch in the inner-most loop if (!L->isInnermost()) return MadeChange; Index: llvm/test/Transforms/LoopDataPrefetch/pragma-prefetch.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopDataPrefetch/pragma-prefetch.ll @@ -0,0 +1,52 @@ +; RUN: opt < %s -passes=loop-data-prefetch -prefetch-distance=16 -cache-line-size=8 -S | FileCheck %s + +define dso_local void @pragma_prefetch(ptr %a, ptr %b, i32 %n) { +entry: + %cmp27 = icmp sgt i32 %n, 0 + br i1 %cmp27, label %for.body.preheader, label %for.cond.cleanup6 + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.cond4.preheader: ; preds = %for.body + br i1 %cmp27, label %for.body7.preheader, label %for.cond.cleanup6 + +for.body7.preheader: ; preds = %for.cond4.preheader + %wide.trip.count35 = zext i32 %n to i64 + br label %for.body7 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv +;CHECK-NOT: call void @llvm.prefetch + %0 = load i32, ptr %arrayidx, align 4 + %1 = trunc i64 %indvars.iv to i32 + %add = add nsw i32 %0, %1 + %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + store i32 %add, ptr %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond4.preheader, label %for.body, !llvm.loop !0 + +for.cond.cleanup6: ; preds = %for.body7, %entry, %for.cond4.preheader + ret void + +for.body7: ; preds = %for.body7.preheader, %for.body7 + %indvars.iv32 = phi i64 [ 0, %for.body7.preheader ], [ %indvars.iv.next33, %for.body7 ] + %arrayidx9 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv32 +;CHECK: call void @llvm.prefetch + %2 = load i32, ptr %arrayidx9 + %3 = trunc i64 %indvars.iv32 to i32 + %add10 = add nsw i32 %2, %3 + %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv32 + store i32 %add10, ptr %arrayidx12 + %indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1 + %exitcond36.not = icmp eq i64 %indvars.iv.next33, %wide.trip.count35 + br i1 %exitcond36.not, label %for.cond.cleanup6, label %for.body7, !llvm.loop !3 +} + +!0 = distinct !{!0, !1, !2} +!1 = !{!"llvm.loop.mustprogress"} +!2 = !{!"llvm.loop.prefetch.disable", i1 true} +!3 = distinct !{!3, !1}