Index: clang/docs/LanguageExtensions.rst =================================================================== --- clang/docs/LanguageExtensions.rst +++ clang/docs/LanguageExtensions.rst @@ -4021,9 +4021,10 @@ The ``#pragma clang loop`` directive is used to specify hints for optimizing the subsequent for, while, do-while, or c++11 range-based for loop. The directive -provides options for vectorization, interleaving, predication, unrolling and -distribution. Loop hints can be specified before any loop and will be ignored if -the optimization is not safe to apply. +provides options for vectorization, interleaving, predication, unrolling, and +distribution. And the directive provides two extended options for prefetch and +noprefetch, which do not support do-while. Loop hints can be specified before +any loop and will be ignored if the optimization is not safe to apply. There are loop hints that control transformations (e.g. vectorization, loop unrolling) and there are loop hints that set transformation options (e.g. @@ -4102,6 +4103,55 @@ might be more efficient when vector predication is efficiently supported by the target platform. + +Prefetch and NoPrefetch +----------------------- + +Prefetch can effectively improve the performance of programs affected by cache miss. + +.. code-block:: c++ + + #pragma clang loop prefetch(variable[, level[, distance]]) + for(...) { + ... + } + +There are three arguments: +1. ``variable``: a memory reference(data to be prefetched), must be a declared +``pointer``/``array`` variable. +2. ``level``: an optional value to the compiler to specify the type of prefetch. +'0': data will not be reused; '1': L1 cache; '2': L2 cache; '3': L3 cache. To use +this argument, must also specify ``variable``. +3. ``distance``: an option integer argument with a value greater than 0. It +indicates the number of loop iterations ahead of which a prefetch is issued, +before the corresponding load or store instruction. To use this argument, +must also specify ``variable`` and ``level``. + +NoPrefetch can provide better debugging assistance for performance optimization +of data prefetching. + +.. code-block:: c++ + + #pragma clang loop noprefetch(variable) + for(...) { + ... + } + +There is only one argument: +``variable``: a memory reference(data to be prefetched), must be a declared +``pointer``/``array`` variable. + +Prefetch information can be specified for different data by using the +prefetch/noprefetch pragma multiple times. + +.. code-block:: c++ + #pragma clang loop prefetch(variable1) + #pragma clang loop prefetch(variable2) + #pragma clang loop noprefetch(variable3) + for (...) { + ... + } + Loop Unrolling -------------- Index: clang/include/clang/Basic/Attr.td =================================================================== --- clang/include/clang/Basic/Attr.td +++ clang/include/clang/Basic/Attr.td @@ -3660,17 +3660,17 @@ ["vectorize", "vectorize_width", "interleave", "interleave_count", "unroll", "unroll_count", "unroll_and_jam", "unroll_and_jam_count", "pipeline", "pipeline_initiation_interval", "distribute", - "vectorize_predicate"], + "vectorize_predicate", "prefetch", "noprefetch"], ["Vectorize", "VectorizeWidth", "Interleave", "InterleaveCount", "Unroll", "UnrollCount", "UnrollAndJam", "UnrollAndJamCount", "PipelineDisabled", "PipelineInitiationInterval", "Distribute", - "VectorizePredicate"]>, + "VectorizePredicate", "Prefetch", "NoPrefetch"]>, EnumArgument<"State", "LoopHintState", ["enable", "disable", "numeric", "fixed_width", - "scalable_width", "assume_safety", "full"], + "scalable_width", "assume_safety", "full", "Reference"], ["Enable", "Disable", "Numeric", "FixedWidth", - "ScalableWidth", "AssumeSafety", "Full"]>, - ExprArgument<"Value">]; + "ScalableWidth", "AssumeSafety", "Full", "Reference"]>, + ExprArgument<"DeclRef">, ExprArgument<"Value">, ExprArgument<"Distance">]; let AdditionalMembers = [{ static const char *getOptionName(int Option) { @@ -3687,6 +3687,8 @@ case PipelineInitiationInterval: return "pipeline_initiation_interval"; case Distribute: return "distribute"; case VectorizePredicate: return "vectorize_predicate"; + case Prefetch: return "prefetch"; + case NoPrefetch: return "noprefetch"; } llvm_unreachable("Unhandled LoopHint option."); } Index: clang/include/clang/Basic/DiagnosticParseKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticParseKinds.td +++ clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1526,15 +1526,19 @@ // Pragma loop support. def err_pragma_loop_missing_argument : Error< - "missing argument; expected %select{an integer value|" - "'enable'%select{|, 'full'}1%select{|, 'assume_safety'}2 or 'disable'}0">; + "missing argument; expected %select{an %select{integer value|declared variable}1|" + "'enable'%select{|, 'full'}2%select{|, 'assume_safety'}3 or 'disable'}0">; def err_pragma_loop_invalid_option : Error< "%select{invalid|missing}0 option%select{ %1|}0; expected vectorize, " "vectorize_width, interleave, interleave_count, unroll, unroll_count, " - "pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute">; + "pipeline, pipeline_initiation_interval, vectorize_predicate, distribute, " + "prefetch, or noprefetch">; def err_pragma_loop_invalid_vectorize_option : Error< "vectorize_width loop hint malformed; use vectorize_width(X, fixed) or " "vectorize_width(X, scalable) where X is an integer, or vectorize_width('fixed' or 'scalable')">; +def err_pragma_prefetch_invalid_arg : Error< + "invalid argument; expected an declared variable">; +def err_pragma_prefetch_punc : Error<"expected ')' or ','%select{| after '%1'}0">; def note_pragma_loop_invalid_vectorize_option : Note< "vectorize_width loop hint malformed; use vectorize_width(X, fixed) or " "vectorize_width(X, scalable) where X is an integer, or vectorize_width('fixed' or 'scalable')">; Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -1024,6 +1024,8 @@ "%select{incompatible|duplicate}0 directives '%1' and '%2'">; def err_pragma_loop_precedes_nonloop : Error< "expected a for, while, or do-while loop to follow '%0'">; +def err_pragma_loop_invalid_numeric : Error< + "invalid value '%0'; expected integer constant between 0 and 3">; def err_pragma_attribute_matcher_subrule_contradicts_rule : Error< "redundant attribute subject matcher sub-rule '%0'; '%1' already matches " Index: clang/include/clang/Parse/LoopHint.h =================================================================== --- clang/include/clang/Parse/LoopHint.h +++ clang/include/clang/Parse/LoopHint.h @@ -31,12 +31,18 @@ // Identifier for the hint state argument. If null, then the state is // default value such as for "#pragma unroll". IdentifierLoc *StateLoc; + // Declaration reference expression for the hint argument if it exits, + // null otherwise. + Expr *DeclRef; // Expression for the hint argument if it exists, null otherwise. Expr *ValueExpr; + // Distance value expression for the hint argument of "prefetch" if it exists, + // null otherwise. + Expr *DistanceExpr; LoopHint() : PragmaNameLoc(nullptr), OptionLoc(nullptr), StateLoc(nullptr), - ValueExpr(nullptr) {} + DeclRef(nullptr), ValueExpr(nullptr), DistanceExpr(nullptr) {} }; } // end namespace clang Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -5644,7 +5644,7 @@ SourceLocation RParen, ParsedType ParsedTy); - bool CheckLoopHintExpr(Expr *E, SourceLocation Loc); + bool CheckLoopHintExpr(Expr *E, SourceLocation Loc, bool IsPrefetch = false); ExprResult ActOnNumericConstant(const Token &Tok, Scope *UDLScope = nullptr); ExprResult ActOnCharacterConstant(const Token &Tok, Index: clang/lib/AST/AttrImpl.cpp =================================================================== --- clang/lib/AST/AttrImpl.cpp +++ clang/lib/AST/AttrImpl.cpp @@ -43,7 +43,17 @@ OS << "("; if (state == Numeric) value->printPretty(OS, nullptr, Policy); - else if (state == FixedWidth || state == ScalableWidth) { + else if (state == Reference) { + declRef->printPretty(OS, nullptr, Policy); + if (value) { + OS << ", "; + value->printPretty(OS, nullptr, Policy); + if (distance) { + OS << ", "; + distance->printPretty(OS, nullptr, Policy); + } + } + } else if (state == FixedWidth || state == ScalableWidth) { if (value) { value->printPretty(OS, nullptr, Policy); if (state == ScalableWidth) Index: clang/lib/CodeGen/CGLoopInfo.h =================================================================== --- clang/lib/CodeGen/CGLoopInfo.h +++ clang/lib/CodeGen/CGLoopInfo.h @@ -14,6 +14,7 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGLOOPINFO_H #define LLVM_CLANG_LIB_CODEGEN_CGLOOPINFO_H +#include "Address.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DebugLoc.h" @@ -30,8 +31,19 @@ class Attr; class ASTContext; class CodeGenOptions; +class Decl; namespace CodeGen { +struct PrefetchHint { + // Value that need to be prefetched or do not need to be prefetched. + llvm::Value *PrefetchValue; + // Cache level of which a prefetch is issued. + int CacheLevel; + // Loop iteration ahead of which a prefetch is issued. + int IterAhead; + bool EnablePrefetch; +}; + /// Attributes that may be specified on loops. struct LoopAttributes { explicit LoopAttributes(bool IsParallel = false); @@ -81,6 +93,9 @@ /// Value for whether the loop is required to make progress. bool MustProgress; + + /// Value for llvm.loop.prefetch metadata + llvm::SmallVector PrefetchInfo; }; /// Information used when generating a structured loop. @@ -103,6 +118,12 @@ /// Return this loop's access group or nullptr if it does not have one. llvm::MDNode *getAccessGroup() const { return AccGroup; } + /// Get the MDNode of prefetch variable for this loop. + llvm::MDNode *getPrefetchMDNode(llvm::Value *Ad) { + auto I = PrefetchMDs.find(Ad); + return I != PrefetchMDs.end() ? I->second : nullptr; + }; + /// Create the loop's metadata. Must be called after its nested loops have /// been processed. void finish(); @@ -126,6 +147,8 @@ /// loop's LoopInfo to set the llvm.loop.unroll_and_jam.followup_inner /// metadata. llvm::MDNode *UnrollAndJamInnerFollowup = nullptr; + /// Each prefetch variable has unique metadata. + llvm::DenseMap PrefetchMDs; /// Create a LoopID without any transformations. llvm::MDNode * @@ -210,8 +233,10 @@ /// The staged attributes are applied to the loop and then cleared. void push(llvm::BasicBlock *Header, clang::ASTContext &Ctx, const clang::CodeGenOptions &CGOpts, - llvm::ArrayRef Attrs, const llvm::DebugLoc &StartLoc, - const llvm::DebugLoc &EndLoc, bool MustProgress = false); + llvm::ArrayRef Attrs, + llvm::DenseMap &LocalDeclMap, + const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc, + bool MustProgress = false); /// End the current loop. void pop(); @@ -285,6 +310,12 @@ /// Set no progress for the next loop pushed. void setMustProgress(bool P) { StagedAttrs.MustProgress = P; } + /// Set prefetch value, cache level, iteration ahead for the next loop pushed, + /// or set noprefetch value. + void setPrefetchInfo(PrefetchHint S) { + StagedAttrs.PrefetchInfo.emplace_back(S); + } + private: /// Returns true if there is LoopInfo on the stack. bool hasInfo() const { return !Active.empty(); } Index: clang/lib/CodeGen/CGLoopInfo.cpp =================================================================== --- clang/lib/CodeGen/CGLoopInfo.cpp +++ clang/lib/CodeGen/CGLoopInfo.cpp @@ -470,6 +470,7 @@ PipelineDisabled = false; PipelineInitiationInterval = 0; MustProgress = false; + PrefetchInfo.clear(); } LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, @@ -484,6 +485,25 @@ AccGroup = MDNode::getDistinct(Ctx, {}); } + if (!Attrs.PrefetchInfo.empty()) { + LLVMContext &Ctx = Header->getContext(); + for (auto S : Attrs.PrefetchInfo) { + Value *Val = S.PrefetchValue; + int CacheLevel = S.CacheLevel; + int IterAhead = S.IterAhead; + bool EnablePrefetch = S.EnablePrefetch; + + Metadata *Vals[] = {ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), EnablePrefetch)), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), CacheLevel)), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), IterAhead))}; + MDNode *MD = MDNode::getDistinct(Ctx, Vals); + PrefetchMDs[Val] = MD; + } + } + if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && Attrs.VectorizeScalable == LoopAttributes::Unspecified && Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && @@ -494,7 +514,7 @@ Attrs.UnrollEnable == LoopAttributes::Unspecified && Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc && - !EndLoc && !Attrs.MustProgress) + !EndLoc && !Attrs.MustProgress && Attrs.PrefetchInfo.empty()) return; TempLoopID = MDNode::getTemporary(Header->getContext(), std::nullopt); @@ -596,6 +616,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, const clang::CodeGenOptions &CGOpts, ArrayRef Attrs, + llvm::DenseMap &LocalDeclMap, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc, bool MustProgress) { // Identify loop hint attributes from Attrs. @@ -611,7 +632,9 @@ LoopHintAttr::OptionType Option = LoopHintAttr::Unroll; LoopHintAttr::LoopHintState State = LoopHintAttr::Disable; - unsigned ValueInt = 1; + int ValueInt = 1; + int DistanceInt = -1; + Value *DeclRef = nullptr; // Translate opencl_unroll_hint attribute argument to // equivalent LoopHintAttr enums. // OpenCL v2.0 s6.11.5: @@ -627,13 +650,33 @@ State = LoopHintAttr::Numeric; } } else if (LH) { + Option = LH->getOption(); + if (Option == LoopHintAttr::Prefetch || + Option == LoopHintAttr::NoPrefetch) + ValueInt = -1; + auto *ValueExpr = LH->getValue(); if (ValueExpr) { llvm::APSInt ValueAPS = ValueExpr->EvaluateKnownConstInt(Ctx); ValueInt = ValueAPS.getSExtValue(); } - Option = LH->getOption(); + Expr *DistanceExpr = LH->getDistance(); + if (DistanceExpr) { + llvm::APSInt ValueAPS = DistanceExpr->EvaluateKnownConstInt(Ctx); + DistanceInt = ValueAPS.getSExtValue(); + } + + if (Expr *Ref = LH->getDeclRef()) { + auto *DRE = dyn_cast(Ref); + assert(DRE && "Attribute must hava a valid declaration reference."); + if (auto *VD = dyn_cast(DRE->getDecl())) { + auto I = LocalDeclMap.find(VD); + if (I != LocalDeclMap.end()) + DeclRef = I->second.getPointer(); + } + } + State = LH->getState(); } switch (State) { @@ -668,6 +711,8 @@ case LoopHintAttr::VectorizeWidth: case LoopHintAttr::InterleaveCount: case LoopHintAttr::PipelineInitiationInterval: + case LoopHintAttr::NoPrefetch: + case LoopHintAttr::Prefetch: llvm_unreachable("Options cannot be disabled."); break; } @@ -696,6 +741,8 @@ case LoopHintAttr::InterleaveCount: case LoopHintAttr::PipelineDisabled: case LoopHintAttr::PipelineInitiationInterval: + case LoopHintAttr::NoPrefetch: + case LoopHintAttr::Prefetch: llvm_unreachable("Options cannot enabled."); break; } @@ -718,6 +765,8 @@ case LoopHintAttr::Distribute: case LoopHintAttr::PipelineDisabled: case LoopHintAttr::PipelineInitiationInterval: + case LoopHintAttr::NoPrefetch: + case LoopHintAttr::Prefetch: llvm_unreachable("Options cannot be used to assume mem safety."); break; } @@ -740,6 +789,8 @@ case LoopHintAttr::PipelineDisabled: case LoopHintAttr::PipelineInitiationInterval: case LoopHintAttr::VectorizePredicate: + case LoopHintAttr::NoPrefetch: + case LoopHintAttr::Prefetch: llvm_unreachable("Options cannot be used with 'full' hint."); break; } @@ -781,10 +832,25 @@ case LoopHintAttr::Interleave: case LoopHintAttr::Distribute: case LoopHintAttr::PipelineDisabled: + case LoopHintAttr::NoPrefetch: + case LoopHintAttr::Prefetch: llvm_unreachable("Options cannot be assigned a value."); break; } break; + case LoopHintAttr::Reference: + switch (Option) { + case LoopHintAttr::Prefetch: + setPrefetchInfo({DeclRef, ValueInt, DistanceInt, true}); + break; + case LoopHintAttr::NoPrefetch: + setPrefetchInfo({DeclRef, ValueInt, DistanceInt, false}); + break; + default: + llvm_unreachable("Option cannot be assigned a variable"); + break; + } + break; } } @@ -822,6 +888,40 @@ else if (AccessGroups.size() >= 2) UnionMD = MDNode::get(I->getContext(), AccessGroups); I->setMetadata("llvm.access.group", UnionMD); + + // Add "llvm.loop.prefetch" metadata for load/store of variables that need + // to be (no)prefetched. + // 1. Assume the prefetch variable is a pointer \p A. + // %A = alloca ptr + // %0 = load ptr, ptr %A + // %arrayidx = getelementptr inbounds i32, ptr %0, i64 %idxprom + // %1 = load i32, ptr %arrayidx + // 2. Assume the prefetch variable is an array \p A. + // %A = alloca [n * i32] + // %arrayidx = getelementptr inbounds i32, ptr %0, i64 %idxprom + // %0 = load i32, ptr %arrayidx + auto *LI = dyn_cast(I); + auto *SI = dyn_cast(I); + if (!Active.empty() && (LI || SI)) { + Value *PrefetchOp = + LI ? LI->getPointerOperand() : SI->getPointerOperand(); + if (auto *GEP = dyn_cast(PrefetchOp)) { + auto *AddrLI = dyn_cast(GEP->getPointerOperand()); + Value *PrefetchVal = + AddrLI ? AddrLI->getPointerOperand() : GEP->getPointerOperand(); + if (PrefetchVal) { + MDNode *PrefetchMD = nullptr; + // For nested loops, use the prefetch pragma of the outer loop + // closest to the current loop. + for (const auto &AL : Active) { + MDNode *MD = AL->getPrefetchMDNode(PrefetchVal); + PrefetchMD = MD ? MD : PrefetchMD; + } + if (PrefetchMD) + I->setMetadata("llvm.loop.prefetch", PrefetchMD); + } + } + } } if (!hasInfo()) Index: clang/lib/CodeGen/CGStmt.cpp =================================================================== --- clang/lib/CodeGen/CGStmt.cpp +++ clang/lib/CodeGen/CGStmt.cpp @@ -903,7 +903,7 @@ bool EmitBoolCondBranch = !CondIsConstInt || !C->isOne(); const SourceRange &R = S.getSourceRange(); LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), CGM.getCodeGenOpts(), - WhileAttrs, SourceLocToDebugLoc(R.getBegin()), + WhileAttrs, LocalDeclMap, SourceLocToDebugLoc(R.getBegin()), SourceLocToDebugLoc(R.getEnd()), checkIfLoopMustProgress(CondIsConstInt)); @@ -1002,7 +1002,7 @@ const SourceRange &R = S.getSourceRange(); LoopStack.push(LoopBody, CGM.getContext(), CGM.getCodeGenOpts(), DoAttrs, - SourceLocToDebugLoc(R.getBegin()), + LocalDeclMap, SourceLocToDebugLoc(R.getBegin()), SourceLocToDebugLoc(R.getEnd()), checkIfLoopMustProgress(CondIsConstInt)); @@ -1048,7 +1048,7 @@ const SourceRange &R = S.getSourceRange(); LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs, - SourceLocToDebugLoc(R.getBegin()), + LocalDeclMap, SourceLocToDebugLoc(R.getBegin()), SourceLocToDebugLoc(R.getEnd()), checkIfLoopMustProgress(CondIsConstInt)); @@ -1163,7 +1163,7 @@ const SourceRange &R = S.getSourceRange(); LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs, - SourceLocToDebugLoc(R.getBegin()), + LocalDeclMap, SourceLocToDebugLoc(R.getBegin()), SourceLocToDebugLoc(R.getEnd())); // If there are any cleanups between here and the loop-exit scope, Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2097,7 +2097,8 @@ OMPLoopNestStack.clear(); if (AS) LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), - AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), + AS->getAttrs(), LocalDeclMap, + SourceLocToDebugLoc(R.getBegin()), SourceLocToDebugLoc(R.getEnd())); else LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), Index: clang/lib/Parse/ParsePragma.cpp =================================================================== --- clang/lib/Parse/ParsePragma.cpp +++ clang/lib/Parse/ParsePragma.cpp @@ -1340,12 +1340,16 @@ bool OptionUnrollAndJam = false; bool OptionDistribute = false; bool OptionPipelineDisabled = false; + bool OptionNoPrefetch = false; + bool OptionPrefetch = false; bool StateOption = false; if (OptionInfo) { // Pragma Unroll does not specify an option. OptionUnroll = OptionInfo->isStr("unroll"); OptionUnrollAndJam = OptionInfo->isStr("unroll_and_jam"); OptionDistribute = OptionInfo->isStr("distribute"); OptionPipelineDisabled = OptionInfo->isStr("pipeline"); + OptionPrefetch = OptionInfo->isStr("prefetch"); + OptionNoPrefetch = OptionInfo->isStr("noprefetch"); StateOption = llvm::StringSwitch(OptionInfo->getName()) .Case("vectorize", true) .Case("interleave", true) @@ -1362,6 +1366,7 @@ ConsumeAnnotationToken(); Diag(Toks[0].getLocation(), diag::err_pragma_loop_missing_argument) << /*StateArgument=*/StateOption + << /*VariableArg=*/(OptionPrefetch || OptionNoPrefetch) << /*FullKeyword=*/(OptionUnroll || OptionUnrollAndJam) << /*AssumeSafetyKeyword=*/AssumeSafetyArg; return false; @@ -1394,6 +1399,101 @@ Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol) << PragmaLoopHintString(Info->PragmaName, Info->Option); Hint.StateLoc = IdentifierLoc::create(Actions.Context, StateLoc, StateInfo); + } else if (OptionNoPrefetch || OptionPrefetch) { + UnqualifiedId Name; + CXXScopeSpec ScopeSpec; + SourceLocation TemplateKWLoc; + + PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/false, + /*IsReinject=*/false); + ConsumeAnnotationToken(); + + IdentifierInfo *StateInfo = Tok.getIdentifierInfo(); + SourceLocation StateLoc = ConsumeToken(); // identifier + if (!StateInfo) { + Diag(StateLoc, diag::err_pragma_prefetch_invalid_arg); + while (Tok.isNot(tok::eof)) + ConsumeAnyToken(); + ConsumeToken(); + return false; + } + Hint.StateLoc = IdentifierLoc::create(Actions.Context, StateLoc, StateInfo); + + Name.setIdentifier(StateInfo, StateLoc); + ExprResult Res = + Actions.ActOnIdExpression(getCurScope(), ScopeSpec, TemplateKWLoc, Name, + Tok.is(tok::l_paren), false); + + auto ConsumeRemainToks = [&]() { + while (Tok.isNot(tok::eof)) + ConsumeAnyToken(); + ConsumeToken(); + }; + + if (Res.get()->containsErrors()) { + // Do not do error diagnosis here, it will be dealt with later. + ConsumeRemainToks(); + return false; + } + + if (OptionNoPrefetch) { + if (Toks.size() > 2) { + Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol) + << PragmaLoopHintString(Info->PragmaName, Info->Option); + while (Tok.isNot(tok::eof)) + ConsumeAnyToken(); + } + Hint.DeclRef = Res.get(); + } + + if (OptionPrefetch) { + if (Tok.isNot(tok::eof)) { + if (Tok.isNot(tok::comma)) { + Diag(Tok.getLocation(), diag::err_pragma_prefetch_punc) + << /*IsIdentifier=*/1 << StateInfo->getName(); + ConsumeRemainToks(); + return false; + } + + PP.Lex(Tok); // ',' + ExprResult R1 = ParseConstantExpression(); // 'numeric_constant' + if (R1.isInvalid() || + Actions.CheckLoopHintExpr(R1.get(), R1.get()->getExprLoc(), + /*IsPrefetch=*/true)) { + ConsumeRemainToks(); + return false; + } + + if (Tok.isNot(tok::eof)) { + if (Tok.isNot(tok::comma)) { + Diag(Tok.getLocation(), diag::err_pragma_prefetch_punc) + << /*IsIdentifier=*/0; + ConsumeRemainToks(); + return false; + } + + PP.Lex(Tok); // ',' + ExprResult R2 = ParseConstantExpression(); // 'numeric_constant' + if (R2.isInvalid() || + Actions.CheckLoopHintExpr(R2.get(), R2.get()->getExprLoc())) { + ConsumeRemainToks(); + return false; + } + Hint.DistanceExpr = R2.get(); + + if (Tok.isNot(tok::eof)) { + Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol) + << PragmaLoopHintString(Info->PragmaName, Info->Option); + while (Tok.isNot(tok::eof)) + ConsumeAnyToken(); + } + } + Hint.ValueExpr = R1.get(); + } + Hint.DeclRef = Res.get(); + } + + ConsumeToken(); // Consume the constant expression eof terminator. } else if (OptionInfo && OptionInfo->getName() == "vectorize_width") { PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/false, /*IsReinject=*/false); @@ -3440,6 +3540,8 @@ /// 'unroll_count' '(' loop-hint-value ')' /// 'pipeline' '(' disable ')' /// 'pipeline_initiation_interval' '(' loop-hint-value ')' +/// 'noprefetch' '(' loop-access-decl ')' +/// 'prefetch' '(' loop-access-decl ',' loop-hint-value ',' loop-hint-value ')' /// /// loop-hint-keyword: /// 'enable' @@ -3454,6 +3556,9 @@ /// loop-hint-value: /// constant-expression /// +/// loop-access-decl: +/// declaration-reference-expression +/// /// Specifying vectorize(enable) or vectorize_width(_value_) instructs llvm to /// try vectorizing the instructions of the loop it precedes. Specifying /// interleave(enable) or interleave_count(_value_) instructs llvm to try @@ -3502,6 +3607,8 @@ .Case("unroll_count", true) .Case("pipeline", true) .Case("pipeline_initiation_interval", true) + .Case("noprefetch", true) + .Case("prefetch", true) .Default(false); if (!OptionValid) { PP.Diag(Tok.getLocation(), diag::err_pragma_loop_invalid_option) Index: clang/lib/Parse/ParseStmt.cpp =================================================================== --- clang/lib/Parse/ParseStmt.cpp +++ clang/lib/Parse/ParseStmt.cpp @@ -2407,10 +2407,14 @@ if (!HandlePragmaLoopHint(Hint)) continue; - ArgsUnion ArgHints[] = {Hint.PragmaNameLoc, Hint.OptionLoc, Hint.StateLoc, - ArgsUnion(Hint.ValueExpr)}; + ArgsUnion ArgHints[] = {Hint.PragmaNameLoc, + Hint.OptionLoc, + Hint.StateLoc, + ArgsUnion(Hint.DeclRef), + ArgsUnion(Hint.ValueExpr), + ArgsUnion(Hint.DistanceExpr)}; TempAttrs.addNew(Hint.PragmaNameLoc->Ident, Hint.Range, nullptr, - Hint.PragmaNameLoc->Loc, ArgHints, 4, + Hint.PragmaNameLoc->Loc, ArgHints, 6, ParsedAttr::AS_Pragma); } Index: clang/lib/Sema/SemaExpr.cpp =================================================================== --- clang/lib/Sema/SemaExpr.cpp +++ clang/lib/Sema/SemaExpr.cpp @@ -3738,7 +3738,7 @@ return FloatingLiteral::Create(S.Context, Val, isExact, Ty, Loc); } -bool Sema::CheckLoopHintExpr(Expr *E, SourceLocation Loc) { +bool Sema::CheckLoopHintExpr(Expr *E, SourceLocation Loc, bool IsPrefetch) { assert(E && "Invalid expression"); if (E->isValueDependent()) @@ -3756,11 +3756,20 @@ if (R.isInvalid()) return true; - bool ValueIsPositive = ValueAPS.isStrictlyPositive(); - if (!ValueIsPositive || ValueAPS.getActiveBits() > 31) { - Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_value) - << toString(ValueAPS, 10) << ValueIsPositive; - return true; + if (IsPrefetch) { + auto Val = ValueAPS.getSExtValue(); + if (Val < 0 || Val > 3) { + Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_numeric) + << toString(ValueAPS, 10); + return true; + } + } else { + bool ValueIsPositive = ValueAPS.isStrictlyPositive(); + if (!ValueIsPositive || ValueAPS.getActiveBits() > 31) { + Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_value) + << toString(ValueAPS, 10) << ValueIsPositive; + return true; + } } return false; Index: clang/lib/Sema/SemaOpenMP.cpp =================================================================== --- clang/lib/Sema/SemaOpenMP.cpp +++ clang/lib/Sema/SemaOpenMP.cpp @@ -15130,9 +15130,9 @@ // assuming runtime trip count". Even if it did work, it must not choose a // larger unroll factor than the maximum loop length, or it would always just // execute the remainder loop. - LoopHintAttr *UnrollHintAttr = - LoopHintAttr::CreateImplicit(Context, LoopHintAttr::UnrollCount, - LoopHintAttr::Numeric, MakeFactorExpr()); + LoopHintAttr *UnrollHintAttr = LoopHintAttr::CreateImplicit( + Context, LoopHintAttr::UnrollCount, LoopHintAttr::Numeric, nullptr, + MakeFactorExpr(), nullptr); AttributedStmt *InnerUnrolled = AttributedStmt::Create(Context, StartLoc, {UnrollHintAttr}, InnerFor); Index: clang/lib/Sema/SemaStmtAttr.cpp =================================================================== --- clang/lib/Sema/SemaStmtAttr.cpp +++ clang/lib/Sema/SemaStmtAttr.cpp @@ -74,7 +74,9 @@ IdentifierLoc *PragmaNameLoc = A.getArgAsIdent(0); IdentifierLoc *OptionLoc = A.getArgAsIdent(1); IdentifierLoc *StateLoc = A.getArgAsIdent(2); - Expr *ValueExpr = A.getArgAsExpr(3); + Expr *DeclRef = A.getArgAsExpr(3); + Expr *ValueExpr = A.getArgAsExpr(4); + Expr *DistanceExpr = A.getArgAsExpr(5); StringRef PragmaName = llvm::StringSwitch(PragmaNameLoc->Ident->getName()) @@ -133,6 +135,8 @@ .Case("pipeline_initiation_interval", LoopHintAttr::PipelineInitiationInterval) .Case("distribute", LoopHintAttr::Distribute) + .Case("noprefetch", LoopHintAttr::NoPrefetch) + .Case("prefetch", LoopHintAttr::Prefetch) .Default(LoopHintAttr::Vectorize); if (Option == LoopHintAttr::VectorizeWidth) { assert((ValueExpr || (StateLoc && StateLoc->Ident)) && @@ -150,6 +154,10 @@ if (S.CheckLoopHintExpr(ValueExpr, St->getBeginLoc())) return nullptr; State = LoopHintAttr::Numeric; + } else if (Option == LoopHintAttr::NoPrefetch || + Option == LoopHintAttr::Prefetch) { + assert(DeclRef && "Attribute must hava a valid declaration reference."); + State = LoopHintAttr::Reference; } else if (Option == LoopHintAttr::Vectorize || Option == LoopHintAttr::Interleave || Option == LoopHintAttr::VectorizePredicate || @@ -171,7 +179,8 @@ llvm_unreachable("bad loop hint"); } - return LoopHintAttr::CreateImplicit(S.Context, Option, State, ValueExpr, A); + return LoopHintAttr::CreateImplicit(S.Context, Option, State, DeclRef, + ValueExpr, DistanceExpr, A); } namespace { @@ -325,6 +334,13 @@ // The vector predication only has a state form that is exposed by // #pragma clang loop vectorize_predicate (enable | disable). VectorizePredicate, + // For NoPrefetch, it only has one variant: a declaration-reference form. + NoPrefetch, + // For Prefetch, it has three variants: a declaration-reference form and + // two numeric forms. The first numeric form range from 0 to 3, indicating + // the cache level. The second numeric form is a positive integer, + // indicating the iteration ahead. + Prefetch, // This serves as a indicator to how many category are listed in this enum. NumberOfCategories }; @@ -333,7 +349,9 @@ struct { const LoopHintAttr *StateAttr; const LoopHintAttr *NumericAttr; + const LoopHintAttr *ReferenceAttr; } HintAttrs[CategoryType::NumberOfCategories] = {}; + llvm::DenseMap PrefetchPrevAttr; for (const auto *I : Attrs) { const LoopHintAttr *LH = dyn_cast(I); @@ -372,6 +390,12 @@ case LoopHintAttr::VectorizePredicate: Category = VectorizePredicate; break; + case LoopHintAttr::Prefetch: + Category = Prefetch; + break; + case LoopHintAttr::NoPrefetch: + Category = NoPrefetch; + break; }; assert(Category != NumberOfCategories && "Unhandled loop hint option"); @@ -386,6 +410,11 @@ // Enable|Disable|AssumeSafety hint. For example, vectorize(enable). PrevAttr = CategoryState.StateAttr; CategoryState.StateAttr = LH; + } else if (Option == LoopHintAttr::Prefetch || + Option == LoopHintAttr::NoPrefetch) { + // Reference hint. For example, prefetch(a). + PrevAttr = nullptr; + CategoryState.ReferenceAttr = LH; } else { // Numeric hint. For example, vectorize_width(8). PrevAttr = CategoryState.NumericAttr; @@ -400,6 +429,21 @@ << /*Duplicate=*/true << PrevAttr->getDiagnosticName(Policy) << LH->getDiagnosticName(Policy); + // Cannot specify prefetch and/or noprefetch for a same declaration + // reference twice. + if (CategoryState.ReferenceAttr) { + if (auto *DeclRef = dyn_cast(LH->getDeclRef())) { + ValueDecl *VD = DeclRef->getDecl(); + auto Iter = PrefetchPrevAttr.find(VD); + if (Iter != PrefetchPrevAttr.end()) { + S.Diag(OptionLoc, diag::err_pragma_loop_compatibility) + << /*Duplicate=*/false << Iter->second->getDiagnosticName(Policy) + << CategoryState.ReferenceAttr->getDiagnosticName(Policy); + } + PrefetchPrevAttr.insert({VD, LH}); + } + } + if (CategoryState.StateAttr && CategoryState.NumericAttr && (Category == Unroll || Category == UnrollAndJam || CategoryState.StateAttr->getState() == LoopHintAttr::Disable)) { Index: clang/lib/Sema/SemaTemplateInstantiate.cpp =================================================================== --- clang/lib/Sema/SemaTemplateInstantiate.cpp +++ clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1764,7 +1764,8 @@ // Create new LoopHintValueAttr with integral expression in place of the // non-type template parameter. return LoopHintAttr::CreateImplicit(getSema().Context, LH->getOption(), - LH->getState(), TransformedExpr, *LH); + LH->getState(), LH->getDeclRef(), + TransformedExpr, LH->getDistance(), *LH); } ExprResult TemplateInstantiator::transformNonTypeTemplateParmRef( Index: clang/test/CodeGenCXX/pragma-prefetch-noprefetch.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenCXX/pragma-prefetch-noprefetch.cpp @@ -0,0 +1,214 @@ +// RUN: %clang_cc1 -std=c++11 -emit-llvm -o - %s | FileCheck -check-prefix=CHECK %s +// RUN: %clang_cc1 -std=c++11 -fopenmp -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-OMP %s + +// Verify noprefetch metadata is generated correctly. +int noprefetch_test(int *a, int n, int &sum) { + #pragma clang loop noprefetch(a) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1:%.*]], align [[ALIGN:.*]], !llvm.loop.prefetch ![[NOPREFETCH1:.*]] + sum += a[i]; + } + + int *b = a; + #pragma clang loop noprefetch(a) + #pragma clang loop noprefetch(b) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[NOPREFETCH2:.*]] +// CHECK: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX3:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[NOPREFETCH3:.*]] + sum += a[i] + b[i]; + } + + #pragma clang loop noprefetch(a,) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX4:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[NOPREFETCH4:.*]] + sum += a[i]; + } + + #pragma clang loop vectorize(enable) noprefetch(a) unroll(full) distribute(enable) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX5:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[NOPREFETCH5:.*]] + sum += a[i]; + } + + #pragma clang loop noprefetch(a) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX6:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[NOPREFETCH6:.*]] + sum += a[i]; + for (int j = 0; j < n; ++j) { +// CHECK: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX7:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[NOPREFETCH6]] + sum += a[j]; + } + } + + int c[n]; + #pragma clang loop noprefetch(c) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX8:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[NOPREFETCH7:.*]] + sum += c[i]; + } + return sum; +} + +// Verify the while loop is recognized after a sequence of pragma clang loop directives. +int while_noprefetch_test(int *a, int n, int &sum) { + int i = 0; + #pragma clang loop vectorize(enable) + #pragma clang loop interleave_count(4) + #pragma clang loop noprefetch(a) + #pragma clang loop unroll(full) + #pragma clang loop distribute(enable) + while (i++ < n) { +// CHECK: [[TMP:%.*]] = load i32, ptr [[ARRAYIDX:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[NOPREFETCH8:.*]] + sum += a[i]; + } + return sum; +} + +// Verify prefetch metadata is generated correctly. +int prefetch_test(int *a, int n, int &sum) { + #pragma clang loop prefetch(a) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH1:.*]] + sum += a[i]; + } + + #pragma clang loop prefetch(a, 1) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH2:.*]] + sum += a[i]; + } + + #pragma clang loop prefetch(a, 1, 8) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX3:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH3:.*]] + sum += a[i]; + } + + #pragma clang loop prefetch(a, 1, 8,) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX4:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH4:.*]] + sum += a[i]; + } + + int *b = a; + #pragma clang loop prefetch(a, 1, 8) + #pragma clang loop prefetch(b, 1) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX5:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH5:.*]] +// CHECK: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX6:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH6:.*]] + sum += a[i] + b[i]; + } + + #pragma clang loop prefetch(a) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX7:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH7:.*]] + sum += a[i]; + for (int j = 0; j < n; ++j) { +// CHECK: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX8:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH7]] + sum += a[j]; + } + } + + #pragma clang loop prefetch(a, 1) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX9:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH8:.*]] + sum += a[i]; + #pragma clang loop prefetch(a, 2) + for (int j = 0; j < n; ++j) { +// CHECK: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX10:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH9:.*]] + sum += a[j]; + } + } + + return sum; +} + +// Verify while loop is recoginized after sequnence of pragma clang loop directives. +int while_prefetch_test(int *a, int n, int &sum) { + int i = 0; + #pragma clang loop vectorize(enable) + #pragma clang loop interleave_count(4) + #pragma clang loop prefetch(a) + #pragma clang loop unroll(full) + #pragma clang loop distribute(enable) + while (i++ < n) { +// CHECK: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH10:.*]] + sum += a[i]; + } + return sum; +} + +int prefetch_noprefetch_test(int *a, int n, int &sum) { + #pragma clang loop noprefetch(a) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[NOPREFETCH9:.*]] + sum += a[i]; + #pragma clang loop prefetch(a) + for (int j = 0; j < n; ++j) { +// CHECK: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH11:.*]] + sum += a[j]; + } + } + + #pragma clang loop prefetch(a) + for (int i = 0; i < n; ++i) { +// CHECK: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX3:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH12:.*]] + sum += a[i]; + #pragma clang loop noprefetch(a) + for (int j = 0; j < n; ++j) { +// CHECK: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX4:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[NOPREFETCH10:.*]] + sum += a[j]; + } + } + return sum; +} + +int openmp_prefetch_test(int *a, int n, int &sum) { + #pragma omp parallel for simd + #pragma clang loop prefetch(a, 0, 4) + for (int i = 0; i < n; ++i) { +// CHECK-OMP: [[TMP1:%.*]] = load i32, ptr [[ARRARYIDX1:%.*]], align [[ALIGN:.*]], !llvm.access.group [[GROUP:.*]], !llvm.loop.prefetch ![[PREFETCH13:.*]] + sum += a[i]; + } + + #pragma omp parallel for private(a) + #pragma clang loop prefetch(a, 1, 4) + for (int i = 0; i < n; ++i) { +// CHECK-OMP: [[TMP2:%.*]] = load i32, ptr [[ARRARYIDX2:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH14:.*]] + sum += a[i]; + } + + #pragma omp parallel for firstprivate(a) + #pragma clang loop prefetch(a, 2, 4) + for (int i = 0; i < n; ++i) { +// CHECK-OMP: [[TMP3:%.*]] = load i32, ptr [[ARRARYIDX3:%.*]], align [[ALIGN]], !llvm.loop.prefetch ![[PREFETCH15:.*]] + sum += a[i]; + } + return sum; +} + +// CHECK: ![[NOPREFETCH1]] = distinct !{i1 false, i32 -1, i32 -1} +// CHECK: ![[NOPREFETCH2]] = distinct !{i1 false, i32 -1, i32 -1} +// CHECK: ![[NOPREFETCH3]] = distinct !{i1 false, i32 -1, i32 -1} +// CHECK: ![[NOPREFETCH4]] = distinct !{i1 false, i32 -1, i32 -1} +// CHECK: ![[NOPREFETCH5]] = distinct !{i1 false, i32 -1, i32 -1} +// CHECK: ![[NOPREFETCH6]] = distinct !{i1 false, i32 -1, i32 -1} +// CHECK: ![[NOPREFETCH7]] = distinct !{i1 false, i32 -1, i32 -1} +// CHECK: ![[NOPREFETCH8]] = distinct !{i1 false, i32 -1, i32 -1} +// CHECK: ![[PREFETCH1]] = distinct !{i1 true, i32 -1, i32 -1} +// CHECK: ![[PREFETCH2]] = distinct !{i1 true, i32 1, i32 -1} +// CHECK: ![[PREFETCH3]] = distinct !{i1 true, i32 1, i32 8} +// CHECK: ![[PREFETCH4]] = distinct !{i1 true, i32 1, i32 8} +// CHECK: ![[PREFETCH5]] = distinct !{i1 true, i32 1, i32 8} +// CHECK: ![[PREFETCH6]] = distinct !{i1 true, i32 1, i32 -1} +// CHECK: ![[PREFETCH7]] = distinct !{i1 true, i32 -1, i32 -1} +// CHECK: ![[PREFETCH8]] = distinct !{i1 true, i32 1, i32 -1} +// CHECK: ![[PREFETCH9]] = distinct !{i1 true, i32 2, i32 -1} +// CHECK: ![[PREFETCH10]] = distinct !{i1 true, i32 -1, i32 -1} +// CHECK: ![[NOPREFETCH9]] = distinct !{i1 false, i32 -1, i32 -1} +// CHECK: ![[PREFETCH11]] = distinct !{i1 true, i32 -1, i32 -1} +// CHECK: ![[PREFETCH12]] = distinct !{i1 true, i32 -1, i32 -1} +// CHECK: ![[NOPREFETCH10]] = distinct !{i1 false, i32 -1, i32 -1} +// CHECK-OMP: ![[PREFETCH13]] = distinct !{i1 true, i32 0, i32 4} +// CHECK-OMP: ![[PREFETCH14]] = distinct !{i1 true, i32 1, i32 4} +// CHECK-OMP: ![[PREFETCH15]] = distinct !{i1 true, i32 2, i32 4} Index: clang/test/Parser/pragma-loop.cpp =================================================================== --- clang/test/Parser/pragma-loop.cpp +++ clang/test/Parser/pragma-loop.cpp @@ -157,7 +157,7 @@ /* expected-error {{missing argument; expected 'enable', 'full' or 'disable'}} */ #pragma clang loop unroll() /* expected-error {{missing argument; expected 'enable' or 'disable'}} */ #pragma clang loop distribute() -/* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute}} */ #pragma clang loop +/* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, distribute, prefetch, or noprefetch}} */ #pragma clang loop /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword(enable) /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop vectorize(enable) badkeyword(4) Index: clang/test/Parser/pragma-prefetch-noprefetch.cpp =================================================================== --- /dev/null +++ clang/test/Parser/pragma-prefetch-noprefetch.cpp @@ -0,0 +1,200 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s + +// Note that this puts the expected lines before the directives to work around +// limitations in the -verify mode. + +int test_noprefetch(int *a, int n, int &sum) { + #pragma clang loop noprefetch(a) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + + int i = 0; + #pragma clang loop noprefetch(a) + while (i++ < n) { + sum += a[i]; + } + + #pragma clang loop noprefetch(a) +/* expected-error {{incompatible directives 'noprefetch(a)' and 'prefetch(a)'}} */ #pragma clang loop prefetch(a) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + + #pragma clang loop noprefetch(a) +/* expected-error {{incompatible directives 'noprefetch(a)' and 'noprefetch(a)'}} */ #pragma clang loop noprefetch(a) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{use of undeclared identifier 'b'}} */ #pragma clang loop noprefetch(b) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{use of undeclared identifier 'b'}} */ #pragma clang loop noprefetch(b) + for (int i = 0; i < n; ++i) { + int *b = a; + sum += a[i]; + } + +/* expected-error {{invalid argument; expected an declared variable}} */ #pragma clang loop noprefetch(1) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{missing argument; expected an declared variable}} */ #pragma clang loop noprefetch() + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-warning {{extra tokens at end of '#pragma clang loop noprefetch' - ignored}} */ #pragma clang loop noprefetch(a,2) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + return sum; +} + +int test_prefetch(int *a, int n, int &sum) { + #pragma clang loop prefetch(a) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + + #pragma clang loop prefetch(a, 0) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + + #pragma clang loop prefetch(a, 1+2) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + + #pragma clang loop prefetch(a, 3, 2147483647) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + + int i = 0; + #pragma clang loop noprefetch(a) + while (i++ < n) { + sum += a[i]; + } + + int *b = a; + #pragma clang loop prefetch(a) + #pragma clang loop prefetch(b) + for (int i = 0; i < n; ++i) { + sum += a[i] + b[i]; + } + + #pragma clang loop prefetch(a) +/* expected-error {{incompatible directives 'prefetch(a)' and 'prefetch(a, 1)'}} */ #pragma clang loop prefetch(a, 1) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{use of undeclared identifier 'c'}} */ #pragma clang loop prefetch(c) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{use of undeclared identifier 'c'}} */ #pragma clang loop prefetch(c) + for (int i = 0; i < n; ++i) { + int *c = a; + sum += a[i]; + } + +/* expected-error {{invalid argument; expected an declared variable}} */ #pragma clang loop prefetch(1) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{missing argument; expected an declared variable}} */ #pragma clang loop prefetch() + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{expected ')' or ',' after 'a'}} */ #pragma clang loop prefetch(a;) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{expected ')' or ','}} */ #pragma clang loop prefetch(a, 1;) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{expected expression}} */ #pragma clang loop prefetch(a,) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{invalid argument of type 'double'; expected an integer type}} */ #pragma clang loop prefetch(a, 2.5) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{expected expression}} */ #pragma clang loop prefetch(a,,) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{invalid argument of type 'char'; expected an integer type}} */ #pragma clang loop prefetch(a, 'c') + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{use of undeclared identifier 'c'}} */ #pragma clang loop prefetch(a, c) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{invalid argument of type 'int *'; expected an integer type}} */ #pragma clang loop prefetch(a, a) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{invalid value '-1'; expected integer constant between 0 and 3}} */ #pragma clang loop prefetch(a, -1) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{invalid value '4'; expected integer constant between 0 and 3}} */ #pragma clang loop prefetch(a, 4) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{invalid value '5'; expected integer constant between 0 and 3}} */ #pragma clang loop prefetch(a, 1+4) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{expected expression}} */ #pragma clang loop prefetch(a, 2,) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{invalid argument of type 'char'; expected an integer type}} */ #pragma clang loop prefetch(a, 2, 'c') + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{invalid value '0'; must be positive}} */ #pragma clang loop prefetch(a, 2, 0) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-error {{value '2147483648' is too large}} */ #pragma clang loop prefetch(a, 2, 2147483648) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + +/* expected-warning {{extra tokens at end of '#pragma clang loop noprefetch' - ignored}} */ #pragma clang loop noprefetch(a,2,3,4) + for (int i = 0; i < n; ++i) { + sum += a[i]; + } + + return sum; +} Index: clang/test/Parser/pragma-unroll-and-jam.cpp =================================================================== --- clang/test/Parser/pragma-unroll-and-jam.cpp +++ clang/test/Parser/pragma-unroll-and-jam.cpp @@ -67,7 +67,7 @@ } // pragma clang unroll_and_jam is disabled for the moment -/* expected-error {{invalid option 'unroll_and_jam'; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute}} */ #pragma clang loop unroll_and_jam(4) +/* expected-error {{invalid option 'unroll_and_jam'; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, distribute, prefetch, or noprefetch}} */ #pragma clang loop unroll_and_jam(4) for (int i = 0; i < Length; i++) { for (int j = 0; j < Length; j++) { List[i * Length + j] = Value;