Index: docs/ReleaseNotes.rst =================================================================== --- docs/ReleaseNotes.rst +++ docs/ReleaseNotes.rst @@ -106,6 +106,9 @@ as interleave and unrolling count can be manually specified. See language extensions for details. +When compiling the CUDA C/C++ language Clang now supports the `#pragma unroll` +directive to specify loop unrolling optimization hints. + C Language Changes in Clang --------------------------- Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -1814,3 +1814,20 @@ let Documentation = [LoopHintDocs]; } + +def CudaUnrollHint : Attr { + let Spellings = [Pragma<"", "unroll">]; + + let Args = [IntArgument<"Value">]; + + let AdditionalMembers = [{ + void printPrettyPragma(raw_ostream &OS, const PrintingPolicy &Policy) const { + // The string 'unroll' is emitted earlier when emitting the pragma name. + if (value) + OS << " " << value; + OS << "\n"; + } + }]; + + let Documentation = [CudaUnrollHintDocs]; +} Index: include/clang/Basic/AttrDocs.td =================================================================== --- include/clang/Basic/AttrDocs.td +++ include/clang/Basic/AttrDocs.td @@ -1024,3 +1024,36 @@ for details. }]; } + +def CudaUnrollHintDocs : Documentation { + let Category = DocCatStmt; + let Content = [{ +When compiling the CUDA C/C++ language (indicated by specifying the ``-x cuda`` +command-line argument) the ``#pragma unroll`` directive is supported to specify +loop unrolling hints. The pragma is placed immediately before a for, while, +do-while, or c++11 range-based for loop. The pragma takes an optional parameter +which must be a positive integer. + +.. code-block:: c++ + + #pragma unroll + for (...) { + ... + } + + #pragma unroll 16 + for (...) { + ... + } + +If ``#pragma unroll`` is specified without a parameter the loop unroller will +attempt to fully unroll the loop if the trip count is known at compile time. +Specifying the optional parameter, ``#pragma unroll _value_``, directs the +unroller to unroll the loop ``_value_`` times. ``#pragma unroll`` and ``#pragma +unroll _value_`` have identical semantics to ``#pragma clang loop +unroll(enable)`` and ``#pragma clang loop unroll_count(_value_)`` respectively. +See `language extensions +`_ +for further details including limitations of the unroll hints. + }]; +} Index: include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- include/clang/Basic/DiagnosticSemaKinds.td +++ include/clang/Basic/DiagnosticSemaKinds.td @@ -549,8 +549,12 @@ def err_pragma_loop_compatibility : Error< "%select{incompatible|duplicate}0 directives '%1(%2)' and '%3(%4)'">; def err_pragma_loop_precedes_nonloop : Error< - "expected a for, while, or do-while loop to follow the '#pragma clang loop' " + "expected a for, while, or do-while loop to follow the '#pragma %0' " "directive">; +def err_incompatible_pragma_loop_unroll : Error< + "'#pragma unroll' and '#pragma clang loop %0' cannot both be specified">; +def err_duplicate_cuda_unroll_pragma : Error< + "duplicate '#pragma unroll' directives">; /// Objective-C parser diagnostics def err_duplicate_class_def : Error< Index: include/clang/Basic/TokenKinds.def =================================================================== --- include/clang/Basic/TokenKinds.def +++ include/clang/Basic/TokenKinds.def @@ -706,6 +706,11 @@ // handles #pragma loop ... directives. ANNOTATION(pragma_loop_hint) +// Annotations for CUDA-specific unroll pragma directives #pragma unroll ... +// The lexer produces these so that they only take effect when the parser +// handles #pragma loop ... directives. +ANNOTATION(pragma_cuda_unroll) + // Annotations for module import translated from #include etc. ANNOTATION(module_include) ANNOTATION(module_begin) Index: include/clang/Parse/Parser.h =================================================================== --- include/clang/Parse/Parser.h +++ include/clang/Parse/Parser.h @@ -19,6 +19,7 @@ #include "clang/Basic/Specifiers.h" #include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Sema/CudaUnrollHint.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/LoopHint.h" #include "clang/Sema/Sema.h" @@ -163,6 +164,7 @@ std::unique_ptr MSSection; std::unique_ptr OptimizeHandler; std::unique_ptr LoopHintHandler; + std::unique_ptr CudaUnrollHandler; std::unique_ptr CommentSemaHandler; @@ -522,9 +524,13 @@ StmtResult HandlePragmaCaptured(); /// \brief Handle the annotation token produced for - /// #pragma vectorize... + /// #pragma clang loop... LoopHint HandlePragmaLoopHint(); + /// \brief Handle the CUDA-specific annotation token produced for + /// #pragma unroll... + CudaUnrollHint HandlePragmaCudaUnroll(); + /// GetLookAheadToken - This peeks ahead N tokens and returns that token /// without consuming any tokens. LookAhead(0) returns 'Tok', LookAhead(1) /// returns the token after Tok, etc. @@ -1610,6 +1616,11 @@ StmtResult ParsePragmaLoopHint(StmtVector &Stmts, bool OnlyStatement, SourceLocation *TrailingElseLoc, ParsedAttributesWithRange &Attrs); + // Parse a pragma_cuda_unroll annotation token and add a corresponding + // CudaUnrollHint attribute to ParsedAttributes. + StmtResult ParsePragmaCudaUnroll(StmtVector &Stmts, bool OnlyStatement, + SourceLocation *TrailingElseLoc, + ParsedAttributesWithRange &Attrs); /// \brief Describes the behavior that should be taken for an __if_exists /// block. Index: include/clang/Sema/CudaUnrollHint.h =================================================================== --- include/clang/Sema/CudaUnrollHint.h +++ include/clang/Sema/CudaUnrollHint.h @@ -0,0 +1,35 @@ +//===--- CudaUnrollHint.h - Types for CudaUnrollHint ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SEMA_CUDAUNROLLHINT_H +#define LLVM_CLANG_SEMA_CUDAUNROLLHINT_H + +#include "clang/Basic/SourceLocation.h" +#include "clang/Sema/AttributeList.h" + +namespace clang { + +/// \brief Record for a CUDA loop unrolling pragma. +struct CudaUnrollHint { + // Identifier corresponding to the string "unroll" in "#pragma unroll ...". + IdentifierLoc *UnrollLoc; + // Source range of the pragma. + SourceRange Range; + // For pragmas with an unroll count ("#pragma unroll N") this field contains + // the identifier for the unroll count value ("N"), or null if the pragma has + // no unroll count. + IdentifierLoc *ValueLoc; + // Expression for the unroll count, or null if the pragma has no unroll count + // value. + Expr *ValueExpr; +}; + +} // end namespace clang + +#endif // LLVM_CLANG_SEMA_CUDAUNROLLHINT_H Index: lib/CodeGen/CGStmt.cpp =================================================================== --- lib/CodeGen/CGStmt.cpp +++ lib/CodeGen/CGStmt.cpp @@ -537,17 +537,37 @@ // Add vectorize and unroll hints to the metadata on the conditional branch. SmallVector Metadata(1); for (const auto *Attr : Attrs) { - const LoopHintAttr *LH = dyn_cast(Attr); - - // Skip non loop hint attributes - if (!LH) + std::pair MetadataPair; + if (isa(Attr)) + MetadataPair = MetadataForLoopHintAttr(cast(Attr), Context); + else if (isa(Attr)) + MetadataPair = MetadataForCudaUnrollHintAttr( + cast(Attr), Context); + else continue; - LoopHintAttr::OptionType Option = LH->getOption(); - int ValueInt = LH->getValue(); + // Set or overwrite metadata indicated by Name. + Metadata.push_back( + llvm::MDNode::get(Context, {MetadataPair.first, MetadataPair.second})); + } + + if (!Metadata.empty()) { + // Add llvm.loop MDNode to CondBr. + llvm::MDNode *LoopID = llvm::MDNode::get(Context, Metadata); + LoopID->replaceOperandWith(0, LoopID); // First op points to itself. + + CondBr->setMetadata("llvm.loop", LoopID); + } +} + +std::pair +CodeGenFunction::MetadataForLoopHintAttr(const LoopHintAttr *LH, + llvm::LLVMContext &Context) { + LoopHintAttr::OptionType Option = LH->getOption(); + int ValueInt = LH->getValue(); - const char *MetadataName; - switch (Option) { + const char *MetadataName; + switch (Option) { case LoopHintAttr::Vectorize: case LoopHintAttr::VectorizeWidth: MetadataName = "llvm.loop.vectorize.width"; @@ -562,11 +582,11 @@ case LoopHintAttr::UnrollCount: MetadataName = "llvm.loop.unroll.count"; break; - } + } - llvm::Value *Value; - llvm::MDString *Name; - switch (Option) { + llvm::Value *Value; + llvm::MDString *Name; + switch (Option) { case LoopHintAttr::Vectorize: case LoopHintAttr::Interleave: if (ValueInt == 1) { @@ -592,23 +612,26 @@ Name = llvm::MDString::get(Context, MetadataName); Value = llvm::ConstantInt::get(Int32Ty, ValueInt); break; - } - - SmallVector OpValues; - OpValues.push_back(Name); - OpValues.push_back(Value); - - // Set or overwrite metadata indicated by Name. - Metadata.push_back(llvm::MDNode::get(Context, OpValues)); } + return std::make_pair(Name, Value); +} - if (!Metadata.empty()) { - // Add llvm.loop MDNode to CondBr. - llvm::MDNode *LoopID = llvm::MDNode::get(Context, Metadata); - LoopID->replaceOperandWith(0, LoopID); // First op points to itself. - - CondBr->setMetadata("llvm.loop", LoopID); +std::pair +CodeGenFunction::MetadataForCudaUnrollHintAttr(const CudaUnrollHintAttr *CU, + llvm::LLVMContext &Context) { + const char *MetadataName; + llvm::Value *Value; + int ValueInt = CU->getValue(); + if (ValueInt) { + MetadataName = "llvm.loop.unroll.count"; + Value = llvm::ConstantInt::get(Int32Ty, ValueInt); + } else { + // A value of zero indicates that the optional unroll count was not + // specified with the unroll pragma. + MetadataName = "llvm.loop.unroll.enable"; + Value = Builder.getTrue(); } + return std::make_pair(llvm::MDString::get(Context, MetadataName), Value); } void CodeGenFunction::EmitWhileStmt(const WhileStmt &S, Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -1420,6 +1420,13 @@ /// expression and compare the result against zero, returning an Int1Ty value. llvm::Value *EvaluateExprAsBool(const Expr *E); + /// Return a metadata string/value pair corresponding to the loop or CUDA + /// unroll hint attribute. + std::pair MetadataForLoopHintAttr( + const LoopHintAttr *LH, llvm::LLVMContext &Context); + std::pair MetadataForCudaUnrollHintAttr( + const CudaUnrollHintAttr *CU, llvm::LLVMContext &Context); + /// EmitIgnoredExpr - Emit an expression in a context which ignores the result. void EmitIgnoredExpr(const Expr *E); Index: lib/Parse/ParsePragma.cpp =================================================================== --- lib/Parse/ParsePragma.cpp +++ lib/Parse/ParsePragma.cpp @@ -15,6 +15,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Parse/ParseDiagnostic.h" #include "clang/Parse/Parser.h" +#include "clang/Sema/CudaUnrollHint.h" #include "clang/Sema/LoopHint.h" #include "clang/Sema/Scope.h" #include "llvm/ADT/StringSwitch.h" @@ -148,6 +149,12 @@ Token &FirstToken) override; }; +struct PragmaCudaUnrollHandler : public PragmaHandler { + PragmaCudaUnrollHandler() : PragmaHandler("unroll") {} + void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + Token &FirstToken) override; +}; + } // end namespace void Parser::initializePragmaHandlers() { @@ -218,6 +225,11 @@ LoopHintHandler.reset(new PragmaLoopHintHandler()); PP.AddPragmaHandler("clang", LoopHintHandler.get()); + + if (getLangOpts().CUDA) { + CudaUnrollHandler.reset(new PragmaCudaUnrollHandler()); + PP.AddPragmaHandler(CudaUnrollHandler.get()); + } } void Parser::resetPragmaHandlers() { @@ -278,6 +290,11 @@ PP.RemovePragmaHandler("clang", LoopHintHandler.get()); LoopHintHandler.reset(); + + if (getLangOpts().CUDA) { + PP.RemovePragmaHandler(CudaUnrollHandler.get()); + CudaUnrollHandler.reset(); + } } /// \brief Handle the annotation token produced for #pragma unused(...) @@ -633,6 +650,42 @@ return Hint; } +struct PragmaCudaUnrollInfo { + Token Unroll; + Token Value; + bool HasValue; +}; + +CudaUnrollHint Parser::HandlePragmaCudaUnroll() { + assert(Tok.is(tok::annot_pragma_cuda_unroll)); + PragmaCudaUnrollInfo *Info = + static_cast(Tok.getAnnotationValue()); + + CudaUnrollHint Hint; + Hint.UnrollLoc = + IdentifierLoc::create(Actions.Context, Info->Unroll.getLocation(), + Info->Unroll.getIdentifierInfo()); + if (Info->HasValue) { + Hint.Range = + SourceRange(Info->Unroll.getLocation(), Info->Value.getLocation()); + Hint.ValueLoc = + IdentifierLoc::create(Actions.Context, Info->Value.getLocation(), + Info->Value.getIdentifierInfo()); + // FIXME: We should allow non-type template parameters for the loop hint + // value. See bug report #19610 + if (Info->Value.is(tok::numeric_constant)) + Hint.ValueExpr = Actions.ActOnNumericConstant(Info->Value).get(); + else + Hint.ValueExpr = nullptr; + } else { + Hint.Range = SourceRange(Info->Unroll.getLocation()); + Hint.ValueLoc = nullptr; + Hint.ValueExpr = nullptr; + } + + return Hint; +} + // #pragma GCC visibility comes in two variants: // 'push' '(' [visibility] ')' // 'pop' @@ -1755,3 +1808,44 @@ /*DisableMacroExpansion=*/false, /*OwnsTokens=*/true); } + +void PragmaCudaUnrollHandler::HandlePragma(Preprocessor &PP, + PragmaIntroducerKind Introducer, + Token &Tok) { + // "unroll" token from "#pragma unroll...". + Token Unroll = Tok; + + bool HasValue; + Token Value; + PP.Lex(Tok); + if (Tok.is(tok::eod)) { + // Bare unroll pragma: #pragma unroll + HasValue = false; + } else { + // Unroll pragma with numeric argument: #pragma unroll N + HasValue = true; + Value = Tok; + + PP.Lex(Tok); + if (Tok.isNot(tok::eod)) { + PP.Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol) + << "unroll"; + return; + } + } + + auto *Info = new (PP.getPreprocessorAllocator()) PragmaCudaUnrollInfo; + Info->Unroll = Unroll; + Info->HasValue = HasValue; + if (HasValue) + Info->Value = Value; + + Token *TokenArray = new Token[1]; + TokenArray[0].startToken(); + TokenArray[0].setKind(tok::annot_pragma_cuda_unroll); + TokenArray[0].setLocation(Unroll.getLocation()); + TokenArray[0].setAnnotationValue(static_cast(Info)); + PP.EnterTokenStream(TokenArray, 1, + /*DisableMacroExpansion=*/false, + /*OwnsTokens=*/true); +} Index: lib/Parse/ParseStmt.cpp =================================================================== --- lib/Parse/ParseStmt.cpp +++ lib/Parse/ParseStmt.cpp @@ -355,6 +355,10 @@ case tok::annot_pragma_loop_hint: ProhibitAttributes(Attrs); return ParsePragmaLoopHint(Stmts, OnlyStatement, TrailingElseLoc, Attrs); + + case tok::annot_pragma_cuda_unroll: + ProhibitAttributes(Attrs); + return ParsePragmaCudaUnroll(Stmts, OnlyStatement, TrailingElseLoc, Attrs); } // If we reached this code, the statement must end in a semicolon. @@ -1828,6 +1832,26 @@ return S; } +StmtResult Parser::ParsePragmaCudaUnroll(StmtVector &Stmts, bool OnlyStatement, + SourceLocation *TrailingElseLoc, + ParsedAttributesWithRange &Attrs) { + assert(Tok.is(tok::annot_pragma_cuda_unroll)); + + CudaUnrollHint Hint = HandlePragmaCudaUnroll(); + ConsumeToken(); + + // Get the next statement. + MaybeParseCXX11Attributes(Attrs); + + StmtResult S = ParseStatementOrDeclarationAfterAttributes( + Stmts, OnlyStatement, TrailingElseLoc, Attrs); + + ArgsUnion ArgHints[] = {Hint.ValueLoc, ArgsUnion(Hint.ValueExpr)}; + Attrs.addNew(Hint.UnrollLoc->Ident, Hint.Range, nullptr, Hint.UnrollLoc->Loc, + ArgHints, 3, AttributeList::AS_Pragma); + return S; +} + Decl *Parser::ParseFunctionStatementBody(Decl *Decl, ParseScope &BodyScope) { assert(Tok.is(tok::l_brace)); SourceLocation LBraceLoc = Tok.getLocation(); Index: lib/Sema/SemaStmtAttr.cpp =================================================================== --- lib/Sema/SemaStmtAttr.cpp +++ lib/Sema/SemaStmtAttr.cpp @@ -49,7 +49,8 @@ St->getStmtClass() != Stmt::ForStmtClass && St->getStmtClass() != Stmt::CXXForRangeStmtClass && St->getStmtClass() != Stmt::WhileStmtClass) { - S.Diag(St->getLocStart(), diag::err_pragma_loop_precedes_nonloop); + S.Diag(St->getLocStart(), diag::err_pragma_loop_precedes_nonloop) + << "clang loop"; return nullptr; } @@ -104,8 +105,45 @@ A.getRange()); } -static void -CheckForIncompatibleAttributes(Sema &S, SmallVectorImpl &Attrs) { +static Attr *handleCudaUnrollHintAttr(Sema &S, Stmt *St, const AttributeList &A, + SourceRange) { + if (St->getStmtClass() != Stmt::DoStmtClass && + St->getStmtClass() != Stmt::ForStmtClass && + St->getStmtClass() != Stmt::CXXForRangeStmtClass && + St->getStmtClass() != Stmt::WhileStmtClass) { + S.Diag(St->getLocStart(), diag::err_pragma_loop_precedes_nonloop) + << "unroll"; + return nullptr; + } + + int ValueInt = 0; + if (IdentifierLoc *ValueLoc = A.getArgAsIdent(0)) { + Expr *ValueExpr = A.getArgAsExpr(1); + // FIXME: We should support template parameters for the loop hint value. + // See bug report #19610. + llvm::APSInt ValueAPS; + if (!ValueExpr || !ValueExpr->isIntegerConstantExpr(ValueAPS, S.Context) || + (ValueInt = ValueAPS.getSExtValue()) < 1) { + S.Diag(ValueLoc->Loc, diag::err_pragma_loop_invalid_value); + return nullptr; + } + } + return CudaUnrollHintAttr::CreateImplicit(S.Context, ValueInt, A.getRange()); +} + +static void CheckForIncompatibleAttributes( + Sema &S, SmallVectorImpl &Attrs) { + bool HasCudaUnroll = false; + for (const auto *I : Attrs) { + const CudaUnrollHintAttr *CU = dyn_cast(I); + if (!CU) + continue; + + if (HasCudaUnroll) + S.Diag(CU->getLocation(), diag::err_duplicate_cuda_unroll_pragma); + HasCudaUnroll = true; + } + // There are 3 categories of loop hints: vectorize, interleave, and // unroll. Each comes in two variants: an enable/disable form and a // form which takes a numeric argument. For example: @@ -136,19 +174,20 @@ int Option = LH->getOption(); int ValueInt = LH->getValue(); + enum { Vectorize = 0, Interleave = 1, Unroll = 2 }; int Category; switch (Option) { case LoopHintAttr::Vectorize: case LoopHintAttr::VectorizeWidth: - Category = 0; + Category = Vectorize; break; case LoopHintAttr::Interleave: case LoopHintAttr::InterleaveCount: - Category = 1; + Category = Interleave; break; case LoopHintAttr::Unroll: case LoopHintAttr::UnrollCount: - Category = 2; + Category = Unroll; break; }; @@ -191,6 +230,12 @@ << LoopHintAttr::getOptionName(CategoryState.NumericOptionId) << CategoryState.Value; } + + // "#pragma loop unroll*()" and CUDA "#pragma unroll" directives are + // incompatible. + if (Category == Unroll && HasCudaUnroll) + S.Diag(ValueLoc, diag::err_incompatible_pragma_loop_unroll) + << LoopHintAttr::getOptionName(Option); } } @@ -206,6 +251,8 @@ return handleFallThroughAttr(S, St, A, Range); case AttributeList::AT_LoopHint: return handleLoopHintAttr(S, St, A, Range); + case AttributeList::AT_CudaUnrollHint: + return handleCudaUnrollHintAttr(S, St, A, Range); default: // if we're here, then we parsed a known attribute, but didn't recognize // it as a statement attribute => it is declaration attribute Index: test/CodeGen/cuda-pragma-unroll.cu =================================================================== --- test/CodeGen/cuda-pragma-unroll.cu +++ test/CodeGen/cuda-pragma-unroll.cu @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -x cuda -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck -check-prefix=NOTCUDA %s + +// If -x cuda is not specified, then the unroll pragma should not be recognized +// and no loop unroll metadata should be emitted. +// NOTCUDA-NOT: llvm.loop.unroll + +void while_test(int *List, int Length) { + int i = 0; +#pragma unroll + while (i < Length) { + // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_1:.*]] + List[i] = i * 2; + i++; + } +} + +void do_test(int *List, int Length) { + int i = 0; +#pragma unroll + do { + // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_2:.*]] + List[i] = i * 2; + i++; + } while (i < Length); +} + +void for_test(int *List, int Length) { +#pragma unroll + for (int i = 0; i < Length; i++) { + // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_3:.*]] + List[i] = i * 2; + } +} + +void range_test(int *List, int Length) { + int VList[Length]; +#pragma unroll + for (int j : VList) { + // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_4:.*]] + VList[j] = List[j]; + } +} + +void for_unroll_count_test(int *List, int Length) { +#pragma unroll 16 + for (int i = 0; i < Length; i++) { + // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_5:.*]] + List[i] = i * 2; + } +} + +// CHECK: ![[LOOP_1]] = metadata !{metadata ![[LOOP_1]], metadata ![[UNROLL_ENABLE:.*]]} +// CHECK: ![[UNROLL_ENABLE]] = metadata !{metadata !"llvm.loop.unroll.enable", i1 true} +// CHECK: ![[LOOP_2]] = metadata !{metadata ![[LOOP_2]], metadata ![[UNROLL_ENABLE:.*]]} +// CHECK: ![[LOOP_3]] = metadata !{metadata ![[LOOP_3]], metadata ![[UNROLL_ENABLE:.*]]} +// CHECK: ![[LOOP_4]] = metadata !{metadata ![[LOOP_4]], metadata ![[UNROLL_ENABLE:.*]]} +// CHECK: ![[LOOP_5]] = metadata !{metadata ![[LOOP_5]], metadata ![[UNROLL_COUNT_16:.*]]} +// CHECK: ![[UNROLL_COUNT_16]] = metadata !{metadata !"llvm.loop.unroll.count", i32 16} Index: test/Misc/ast-print-cuda-pragmas.cu =================================================================== --- test/Misc/ast-print-cuda-pragmas.cu +++ test/Misc/ast-print-cuda-pragmas.cu @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -x cuda -ast-print -o - %s | FileCheck %s +// RUN: %clang_cc1 -x c++ -ast-print -o - %s | FileCheck -check-prefix=NOTCUDA %s + +// If -x cuda is not specified, then the unroll pragma should not be recognized +// and loop unroll pragmas should be emitted. +// NOTCUDA-NOT: #pragma unroll + +void test(int *List, int Length) { + int i = 0; +#pragma unroll +// CHECK: #pragma unroll +// CHECK-NEXT: for (int i = 0; i < Length; i++) + for (int i = 0; i < Length; i++) { + List[i] = i * 2; + } + +#pragma unroll 8 +// CHECK: #pragma unroll 8 +// CHECK-NEXT: for (int i = 0; i < Length; i++) + for (int i = 0; i < Length; i++) { + List[i] = i * 2; + } +} Index: test/PCH/cuda-pragma-unroll.cu =================================================================== --- test/PCH/cuda-pragma-unroll.cu +++ test/PCH/cuda-pragma-unroll.cu @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -x cuda -emit-pch -o %t.cuda.a %s +// RUN: %clang_cc1 -x cuda -include-pch %t.cuda.a -ast-print -o - %s | FileCheck %s +// RUN: %clang_cc1 -x c++ -emit-pch -o %t.notcuda.a %s +// RUN: %clang_cc1 -x c++ -include-pch %t.notcuda.a -ast-print -o - %s | FileCheck -check-prefix=NOTCUDA %s + +// If -x cuda is not specified, then the unroll pragma should not be recognized +// and loop unroll pragmas should be emitted. +// NOTCUDA-NOT: #pragma unroll + +// CHECK: #pragma unroll +// CHECK: #pragma unroll 16 + +#ifndef HEADER +#define HEADER + +void unroll_test(int *List, int Length) { +#pragma unroll + for (int i = 0; i < Length; i++) { + List[i] = i * 2; + } +} + +void unroll_count_test(int *List, int Length) { +#pragma unroll 16 + for (int i = 0; i < Length; i++) { + List[i] = i * 2; + } +} + +#endif Index: test/Parser/cuda-pragma-unroll.cu =================================================================== --- test/Parser/cuda-pragma-unroll.cu +++ test/Parser/cuda-pragma-unroll.cu @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 -x cuda -verify %s + +// Note that this puts the expected lines before the directives to work around +// limitations in the -verify mode. + +void test(int *List) { + +#pragma unroll + for (int i = 0; i < 1024; ++i) { + List[i] = i; + } + +#pragma unroll 16 + for (int i = 0; i < 1024; ++i) { + List[i] = i; + } + +#pragma unroll +/* expected-error {{'#pragma unroll' and '#pragma clang loop unroll_count' cannot both be specified}} */ #pragma clang loop unroll_count(4) + for (int i = 0; i < 1024; ++i) { + List[i] = i; + } + +#pragma unroll +/* expected-error {{duplicate '#pragma unroll' directives}} */ #pragma unroll + for (int i = 0; i < 1024; ++i) { + List[i] = i; + } + +#pragma unroll 4 +/* expected-error {{duplicate '#pragma unroll' directives}} */ #pragma unroll + for (int i = 0; i < 1024; ++i) { + List[i] = i; + } + +/* expected-error {{invalid argument; expected a positive integer value}} */ #pragma unroll 0 +/* expected-error {{invalid argument; expected a positive integer value}} */ #pragma unroll garbage +/* expected-warning {{extra tokens at end of '#pragma unroll'}} */ #pragma unroll 1 2 +/* expected-error {{invalid argument; expected a positive integer value}} */ #pragma unroll + + for (int i = 0; i < 1024; ++i) { + List[i] = i; + } + +#pragma unroll +/* expected-error {{expected a for, while, or do-while loop to follow the '#pragma unroll' directive}} */ List[0] = List[1]; +}