Index: cfe/trunk/include/clang/AST/OpenMPClause.h =================================================================== --- cfe/trunk/include/clang/AST/OpenMPClause.h +++ cfe/trunk/include/clang/AST/OpenMPClause.h @@ -76,10 +76,17 @@ friend class OMPClauseReader; /// Pre-initialization statement for the clause. Stmt *PreInit; + /// Region that captures the associated stmt. + OpenMPDirectiveKind CaptureRegion; + protected: /// Set pre-initialization statement for the clause. - void setPreInitStmt(Stmt *S) { PreInit = S; } - OMPClauseWithPreInit(const OMPClause *This) : PreInit(nullptr) { + void setPreInitStmt(Stmt *S, OpenMPDirectiveKind ThisRegion = OMPD_unknown) { + PreInit = S; + CaptureRegion = ThisRegion; + } + OMPClauseWithPreInit(const OMPClause *This) + : PreInit(nullptr), CaptureRegion(OMPD_unknown) { assert(get(This) && "get is not tuned for pre-init."); } @@ -88,6 +95,8 @@ const Stmt *getPreInitStmt() const { return PreInit; } /// Get pre-initialization statement for the clause. Stmt *getPreInitStmt() { return PreInit; } + /// Get capture region for the stmt in the clause. + OpenMPDirectiveKind getCaptureRegion() { return CaptureRegion; } static OMPClauseWithPreInit *get(OMPClause *C); static const OMPClauseWithPreInit *get(const OMPClause *C); }; @@ -194,7 +203,7 @@ /// In this example directive '#pragma omp parallel' has simple 'if' clause with /// condition 'a > 5' and directive name modifier 'parallel'. /// -class OMPIfClause : public OMPClause { +class OMPIfClause : public OMPClause, public OMPClauseWithPreInit { friend class OMPClauseReader; /// \brief Location of '('. SourceLocation LParenLoc; @@ -225,26 +234,31 @@ /// /// \param NameModifier [OpenMP 4.1] Directive name modifier of clause. /// \param Cond Condition of the clause. + /// \param HelperCond Helper condition for the clause. + /// \param CaptureRegion Innermost OpenMP region where expressions in this + /// clause must be captured. /// \param StartLoc Starting location of the clause. /// \param LParenLoc Location of '('. /// \param NameModifierLoc Location of directive name modifier. /// \param ColonLoc [OpenMP 4.1] Location of ':'. /// \param EndLoc Ending location of the clause. /// - OMPIfClause(OpenMPDirectiveKind NameModifier, Expr *Cond, - SourceLocation StartLoc, SourceLocation LParenLoc, - SourceLocation NameModifierLoc, SourceLocation ColonLoc, - SourceLocation EndLoc) - : OMPClause(OMPC_if, StartLoc, EndLoc), LParenLoc(LParenLoc), - Condition(Cond), ColonLoc(ColonLoc), NameModifier(NameModifier), - NameModifierLoc(NameModifierLoc) {} + OMPIfClause(OpenMPDirectiveKind NameModifier, Expr *Cond, Stmt *HelperCond, + OpenMPDirectiveKind CaptureRegion, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation NameModifierLoc, + SourceLocation ColonLoc, SourceLocation EndLoc) + : OMPClause(OMPC_if, StartLoc, EndLoc), OMPClauseWithPreInit(this), + LParenLoc(LParenLoc), Condition(Cond), ColonLoc(ColonLoc), + NameModifier(NameModifier), NameModifierLoc(NameModifierLoc) { + setPreInitStmt(HelperCond, CaptureRegion); + } /// \brief Build an empty clause. /// OMPIfClause() - : OMPClause(OMPC_if, SourceLocation(), SourceLocation()), LParenLoc(), - Condition(nullptr), ColonLoc(), NameModifier(OMPD_unknown), - NameModifierLoc() {} + : OMPClause(OMPC_if, SourceLocation(), SourceLocation()), + OMPClauseWithPreInit(this), LParenLoc(), Condition(nullptr), ColonLoc(), + NameModifier(OMPD_unknown), NameModifierLoc() {} /// \brief Sets the location of '('. void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } Index: cfe/trunk/include/clang/AST/RecursiveASTVisitor.h =================================================================== --- cfe/trunk/include/clang/AST/RecursiveASTVisitor.h +++ cfe/trunk/include/clang/AST/RecursiveASTVisitor.h @@ -2711,6 +2711,7 @@ template bool RecursiveASTVisitor::VisitOMPIfClause(OMPIfClause *C) { + TRY_TO(VisitOMPClauseWithPreInit(C)); TRY_TO(TraverseStmt(C->getCondition())); return true; } Index: cfe/trunk/lib/AST/OpenMPClause.cpp =================================================================== --- cfe/trunk/lib/AST/OpenMPClause.cpp +++ cfe/trunk/lib/AST/OpenMPClause.cpp @@ -48,9 +48,10 @@ return static_cast(C); case OMPC_linear: return static_cast(C); + case OMPC_if: + return static_cast(C); case OMPC_default: case OMPC_proc_bind: - case OMPC_if: case OMPC_final: case OMPC_num_threads: case OMPC_safelen: Index: cfe/trunk/lib/AST/StmtProfile.cpp =================================================================== --- cfe/trunk/lib/AST/StmtProfile.cpp +++ cfe/trunk/lib/AST/StmtProfile.cpp @@ -283,6 +283,7 @@ } void OMPClauseProfiler::VisitOMPIfClause(const OMPIfClause *C) { + VistOMPClauseWithPreInit(C); if (C->getCondition()) Profiler->VisitStmt(C->getCondition()); } Index: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp +++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp @@ -26,7 +26,7 @@ namespace { /// Lexical scope for OpenMP executable constructs, that handles correct codegen /// for captured expressions. -class OMPLexicalScope final : public CodeGenFunction::LexicalScope { +class OMPLexicalScope : public CodeGenFunction::LexicalScope { void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { for (const auto *C : S.clauses()) { if (auto *CPI = OMPClauseWithPreInit::get(C)) { @@ -54,10 +54,11 @@ public: OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, - bool AsInlined = false) + bool AsInlined = false, bool EmitPreInitStmt = true) : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), InlinedShareds(CGF) { - emitPreInitStmt(CGF, S); + if (EmitPreInitStmt) + emitPreInitStmt(CGF, S); if (AsInlined) { if (S.hasAssociatedStmt()) { auto *CS = cast(S.getAssociatedStmt()); @@ -81,6 +82,22 @@ } }; +/// Lexical scope for OpenMP parallel construct, that handles correct codegen +/// for captured expressions. +class OMPParallelScope final : public OMPLexicalScope { + bool EmitPreInitStmt(const OMPExecutableDirective &S) { + OpenMPDirectiveKind Kind = S.getDirectiveKind(); + return !isOpenMPTargetExecutionDirective(Kind) && + isOpenMPParallelDirective(Kind); + } + +public: + OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) + : OMPLexicalScope(CGF, S, + /*AsInlined=*/false, + /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} +}; + /// Private scope for OpenMP loop-based directives, that supports capturing /// of used expression from loop statement. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { @@ -1237,7 +1254,7 @@ } } - OMPLexicalScope Scope(CGF, S); + OMPParallelScope Scope(CGF, S); llvm::SmallVector CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, @@ -3409,17 +3426,17 @@ CodeGenModule &CGM = CGF.CGM; const CapturedStmt &CS = *cast(S.getAssociatedStmt()); - llvm::SmallVector CapturedVars; - CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); - llvm::Function *Fn = nullptr; llvm::Constant *FnID = nullptr; - // Check if we have any if clause associated with the directive. const Expr *IfCond = nullptr; - - if (auto *C = S.getSingleClause()) { - IfCond = C->getCondition(); + // Check for the at most one if clause associated with the target region. + for (const auto *C : S.getClausesOfKind()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_target) { + IfCond = C->getCondition(); + break; + } } // Check if we have any device clause associated with the directive. @@ -3456,6 +3473,8 @@ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, IsOffloadEntry, CodeGen); OMPLexicalScope Scope(CGF, S); + llvm::SmallVector CapturedVars; + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, CapturedVars); } Index: cfe/trunk/lib/Sema/SemaOpenMP.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaOpenMP.cpp +++ cfe/trunk/lib/Sema/SemaOpenMP.cpp @@ -1863,6 +1863,7 @@ OMPOrderedClause *OC = nullptr; OMPScheduleClause *SC = nullptr; SmallVector LCs; + SmallVector PICs; // This is required for proper codegen. for (auto *Clause : Clauses) { if (isOpenMPPrivate(Clause->getClauseKind()) || @@ -1879,15 +1880,8 @@ } DSAStack->setForceVarCapturing(/*V=*/false); } else if (isParallelOrTaskRegion(DSAStack->getCurrentDirective())) { - // Mark all variables in private list clauses as used in inner region. - // Required for proper codegen of combined directives. - // TODO: add processing for other clauses. - if (auto *C = OMPClauseWithPreInit::get(Clause)) { - if (auto *DS = cast_or_null(C->getPreInitStmt())) { - for (auto *D : DS->decls()) - MarkVariableReferenced(D->getLocation(), cast(D)); - } - } + if (auto *C = OMPClauseWithPreInit::get(Clause)) + PICs.push_back(C); if (auto *C = OMPClauseWithPostUpdate::get(Clause)) { if (auto *E = C->getPostUpdateExpr()) MarkDeclarationsReferencedInExpr(E); @@ -1933,10 +1927,31 @@ return StmtError(); } StmtResult SR = S; - int ThisCaptureLevel = - getOpenMPCaptureLevels(DSAStack->getCurrentDirective()); - while (--ThisCaptureLevel >= 0) + SmallVector CaptureRegions; + getOpenMPCaptureRegions(CaptureRegions, DSAStack->getCurrentDirective()); + for (auto ThisCaptureRegion : llvm::reverse(CaptureRegions)) { + // Mark all variables in private list clauses as used in inner region. + // Required for proper codegen of combined directives. + // TODO: add processing for other clauses. + if (isParallelOrTaskRegion(DSAStack->getCurrentDirective())) { + for (auto *C : PICs) { + OpenMPDirectiveKind CaptureRegion = C->getCaptureRegion(); + // Find the particular capture region for the clause if the + // directive is a combined one with multiple capture regions. + // If the directive is not a combined one, the capture region + // associated with the clause is OMPD_unknown and is generated + // only once. + if (CaptureRegion == ThisCaptureRegion || + CaptureRegion == OMPD_unknown) { + if (auto *DS = cast_or_null(C->getPreInitStmt())) { + for (auto *D : DS->decls()) + MarkVariableReferenced(D->getLocation(), cast(D)); + } + } + } + } SR = ActOnCapturedRegionEnd(SR.get()); + } return SR; } @@ -6611,6 +6626,137 @@ return Res; } +// An OpenMP directive such as 'target parallel' has two captured regions: +// for the 'target' and 'parallel' respectively. This function returns +// the region in which to capture expressions associated with a clause. +// A return value of OMPD_unknown signifies that the expression should not +// be captured. +static OpenMPDirectiveKind +getOpenMPCaptureRegionForClause(OpenMPDirectiveKind DKind, + OpenMPClauseKind CKind, + OpenMPDirectiveKind NameModifier) { + OpenMPDirectiveKind CaptureRegion = OMPD_unknown; + + switch (CKind) { + case OMPC_if: + switch (DKind) { + case OMPD_target_parallel: + // If this clause applies to the nested 'parallel' region, capture within + // the 'target' region, otherwise do not capture. + if (NameModifier == OMPD_unknown || NameModifier == OMPD_parallel) + CaptureRegion = OMPD_target; + break; + case OMPD_cancel: + case OMPD_parallel: + case OMPD_parallel_sections: + case OMPD_parallel_for: + case OMPD_parallel_for_simd: + case OMPD_target: + case OMPD_target_simd: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + case OMPD_target_teams: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_task: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_target_data: + case OMPD_target_enter_data: + case OMPD_target_exit_data: + case OMPD_target_update: + // Do not capture if-clause expressions. + break; + case OMPD_threadprivate: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_cancellation_point: + case OMPD_flush: + case OMPD_declare_reduction: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_teams: + case OMPD_simd: + case OMPD_for: + case OMPD_for_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskgroup: + case OMPD_distribute: + case OMPD_ordered: + case OMPD_atomic: + case OMPD_distribute_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + llvm_unreachable("Unexpected OpenMP directive with if-clause"); + case OMPD_unknown: + llvm_unreachable("Unknown OpenMP directive"); + } + break; + case OMPC_schedule: + case OMPC_dist_schedule: + case OMPC_firstprivate: + case OMPC_lastprivate: + case OMPC_reduction: + case OMPC_linear: + case OMPC_default: + case OMPC_proc_bind: + case OMPC_final: + case OMPC_num_threads: + case OMPC_safelen: + case OMPC_simdlen: + case OMPC_collapse: + case OMPC_private: + case OMPC_shared: + case OMPC_aligned: + case OMPC_copyin: + case OMPC_copyprivate: + case OMPC_ordered: + case OMPC_nowait: + case OMPC_untied: + case OMPC_mergeable: + case OMPC_threadprivate: + case OMPC_flush: + case OMPC_read: + case OMPC_write: + case OMPC_update: + case OMPC_capture: + case OMPC_seq_cst: + case OMPC_depend: + case OMPC_device: + case OMPC_threads: + case OMPC_simd: + case OMPC_map: + case OMPC_num_teams: + case OMPC_thread_limit: + case OMPC_priority: + case OMPC_grainsize: + case OMPC_nogroup: + case OMPC_num_tasks: + case OMPC_hint: + case OMPC_defaultmap: + case OMPC_unknown: + case OMPC_uniform: + case OMPC_to: + case OMPC_from: + case OMPC_use_device_ptr: + case OMPC_is_device_ptr: + llvm_unreachable("Unexpected OpenMP clause."); + } + return CaptureRegion; +} + OMPClause *Sema::ActOnOpenMPIfClause(OpenMPDirectiveKind NameModifier, Expr *Condition, SourceLocation StartLoc, SourceLocation LParenLoc, @@ -6618,6 +6764,8 @@ SourceLocation ColonLoc, SourceLocation EndLoc) { Expr *ValExpr = Condition; + Stmt *HelperValStmt = nullptr; + OpenMPDirectiveKind CaptureRegion = OMPD_unknown; if (!Condition->isValueDependent() && !Condition->isTypeDependent() && !Condition->isInstantiationDependent() && !Condition->containsUnexpandedParameterPack()) { @@ -6626,10 +6774,20 @@ return nullptr; ValExpr = MakeFullExpr(Val.get()).get(); + + OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); + CaptureRegion = + getOpenMPCaptureRegionForClause(DKind, OMPC_if, NameModifier); + if (CaptureRegion != OMPD_unknown) { + llvm::MapVector Captures; + ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); + HelperValStmt = buildPreInits(Context, Captures); + } } - return new (Context) OMPIfClause(NameModifier, ValExpr, StartLoc, LParenLoc, - NameModifierLoc, ColonLoc, EndLoc); + return new (Context) + OMPIfClause(NameModifier, ValExpr, HelperValStmt, CaptureRegion, StartLoc, + LParenLoc, NameModifierLoc, ColonLoc, EndLoc); } OMPClause *Sema::ActOnOpenMPFinalClause(Expr *Condition, Index: cfe/trunk/lib/Serialization/ASTReaderStmt.cpp =================================================================== --- cfe/trunk/lib/Serialization/ASTReaderStmt.cpp +++ cfe/trunk/lib/Serialization/ASTReaderStmt.cpp @@ -1928,7 +1928,8 @@ } void OMPClauseReader::VisitOMPClauseWithPreInit(OMPClauseWithPreInit *C) { - C->setPreInitStmt(Reader->Record.readSubStmt()); + C->setPreInitStmt(Reader->Record.readSubStmt(), + static_cast(Reader->Record.readInt())); } void OMPClauseReader::VisitOMPClauseWithPostUpdate(OMPClauseWithPostUpdate *C) { @@ -1937,6 +1938,7 @@ } void OMPClauseReader::VisitOMPIfClause(OMPIfClause *C) { + VisitOMPClauseWithPreInit(C); C->setNameModifier(static_cast(Reader->Record.readInt())); C->setNameModifierLoc(Reader->ReadSourceLocation()); C->setColonLoc(Reader->ReadSourceLocation()); Index: cfe/trunk/lib/Serialization/ASTWriterStmt.cpp =================================================================== --- cfe/trunk/lib/Serialization/ASTWriterStmt.cpp +++ cfe/trunk/lib/Serialization/ASTWriterStmt.cpp @@ -1794,6 +1794,7 @@ } void OMPClauseWriter::VisitOMPClauseWithPreInit(OMPClauseWithPreInit *C) { + Record.push_back(C->getCaptureRegion()); Record.AddStmt(C->getPreInitStmt()); } @@ -1803,6 +1804,7 @@ } void OMPClauseWriter::VisitOMPIfClause(OMPIfClause *C) { + VisitOMPClauseWithPreInit(C); Record.push_back(C->getNameModifier()); Record.AddSourceLocation(C->getNameModifierLoc()); Record.AddSourceLocation(C->getColonLoc()); Index: cfe/trunk/test/OpenMP/target_parallel_if_codegen.cpp =================================================================== --- cfe/trunk/test/OpenMP/target_parallel_if_codegen.cpp +++ cfe/trunk/test/OpenMP/target_parallel_if_codegen.cpp @@ -0,0 +1,413 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } + +// CHECK-DAG: [[S1:%.+]] = type { double } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 } + +// We have 6 target regions + +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] + +// Check if offloading descriptor is created. +// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] +// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] +// CHECK: [[DEVBEGIN:@.+]] = external constant i8 +// CHECK: [[DEVEND:@.+]] = external constant i8 +// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }] +// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] } + +// Check target registration is registered as a Ctor. +// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* [[REGFN:@.+]] to void ()*), i8* null }] + + +template +tx ftemplate(int n) { + tx a = 0; + + #pragma omp target parallel if(parallel: 0) + { + a += 1; + } + + short b = 1; + #pragma omp target parallel if(parallel: 1) + { + a += b; + } + + return a; +} + +static +int fstatic(int n) { + + #pragma omp target parallel if(n>1) + { + } + + #pragma omp target parallel if(target: n-2>2) + { + } + + return n+1; +} + +struct S1 { + double a; + + int r1(int n){ + int b = 1; + + #pragma omp target parallel if(parallel: n>3) + { + this->a = (double)b + 1.5; + } + + #pragma omp target parallel if(target: n>4) if(parallel: n>5) + { + this->a = 2.5; + } + + return (int)a; + } +}; + +// CHECK: define {{.*}}@{{.*}}bar{{.*}} +int bar(int n){ + int a = 0; + + S1 S; + // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}}) + a += S.r1(n); + + // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}}) + a += fstatic(n); + + // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}}) + a += ftemplate(n); + + return a; +} + + + +// +// CHECK: define {{.*}}[[FS1]]([[S1]]* {{%.+}}, i32 {{[^%]*}}[[PARM:%.+]]) +// +// CHECK-DAG: store i32 [[PARM]], i32* [[N_ADDR:%.+]], align +// CHECK: [[NV:%.+]] = load i32, i32* [[N_ADDR]], align +// CHECK: [[CMP:%.+]] = icmp sgt i32 [[NV]], 3 +// CHECK: [[FB:%.+]] = zext i1 [[CMP]] to i8 +// CHECK: store i8 [[FB]], i8* [[CAPE_ADDR:%.+]], align +// CHECK: [[CAPE:%.+]] = load i8, i8* [[CAPE_ADDR]], align +// CHECK: [[TB:%.+]] = trunc i8 [[CAPE]] to i1 +// CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i8* +// CHECK: [[FB:%.+]] = zext i1 [[TB]] to i8 +// CHECK: store i8 [[FB]], i8* [[CONV]], align +// CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align +// +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, +// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align +// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// +// CHECK: [[FAIL]] +// CHECK: call void [[HVT1:@.+]]([[S1]]* {{%.+}}, i[[SZ]] {{%.+}}, i[[SZ]] [[ARG]]) +// CHECK: br label {{%?}}[[END]] +// CHECK: [[END]] +// +// +// +// CHECK: [[NV:%.+]] = load i32, i32* [[N_ADDR]], align +// CHECK: [[CMP:%.+]] = icmp sgt i32 [[NV]], 5 +// CHECK: [[FB:%.+]] = zext i1 [[CMP]] to i8 +// CHECK: store i8 [[FB]], i8* [[CAPE_ADDR:%.+]], align +// CHECK: [[CAPE:%.+]] = load i8, i8* [[CAPE_ADDR]], align +// CHECK: [[TB:%.+]] = trunc i8 [[CAPE]] to i1 +// CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i8* +// CHECK: [[FB:%.+]] = zext i1 [[TB]] to i8 +// CHECK: store i8 [[FB]], i8* [[CONV]], align +// CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align +// CHECK: [[NV:%.+]] = load i32, i32* [[N_ADDR]], align +// CHECK: [[CMP:%.+]] = icmp sgt i32 [[NV]], 4 +// CHECK: br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] +// +// CHECK: [[IF_THEN]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, +// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align +// CHECK: br label {{%?}}[[END:.+]] +// +// CHECK: [[IF_ELSE]] +// CHECK: store i32 -1, i32* [[RHV]], align +// CHECK: br label {{%?}}[[END]] +// +// CHECK: [[END]] +// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// +// CHECK: [[FAIL]] +// CHECK: call void [[HVT2:@.+]]([[S1]]* {{%.+}}, i[[SZ]] [[ARG]]) +// CHECK: br label {{%?}}[[END]] +// CHECK: [[END]] + + + + + + +// +// CHECK: define {{.*}}[[FSTATIC]](i32 {{[^%]*}}[[PARM:%.+]]) +// +// CHECK-DAG: store i32 [[PARM]], i32* [[N_ADDR:%.+]], align +// CHECK: [[NV:%.+]] = load i32, i32* [[N_ADDR]], align +// CHECK: [[CMP:%.+]] = icmp sgt i32 [[NV]], 1 +// CHECK: [[FB:%.+]] = zext i1 [[CMP]] to i8 +// CHECK: store i8 [[FB]], i8* [[CAPE_ADDR:%.+]], align +// CHECK: [[CAPE:%.+]] = load i8, i8* [[CAPE_ADDR]], align +// CHECK: [[TB:%.+]] = trunc i8 [[CAPE]] to i1 +// CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i8* +// CHECK: [[FB:%.+]] = zext i1 [[TB]] to i8 +// CHECK: store i8 [[FB]], i8* [[CONV]], align +// CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align +// CHECK: [[CAPE2:%.+]] = load i8, i8* [[CAPE_ADDR]], align +// CHECK: [[TB:%.+]] = trunc i8 [[CAPE2]] to i1 +// CHECK: br i1 [[TB]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] +// +// CHECK: [[IF_THEN]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, +// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align +// CHECK: br label {{%?}}[[END:.+]] +// +// CHECK: [[IF_ELSE]] +// CHECK: store i32 -1, i32* [[RHV]], align +// CHECK: br label {{%?}}[[END]] +// +// CHECK: [[END]] +// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// +// CHECK: [[FAIL]] +// CHECK: call void [[HVT3:@.+]](i[[SZ]] [[ARG]]) +// CHECK: br label {{%?}}[[END]] +// CHECK: [[END]] +// +// +// +// CHECK-DAG: [[NV:%.+]] = load i32, i32* [[N_ADDR]], align +// CHECK: [[SUB:%.+]] = sub nsw i32 [[NV]], 2 +// CHECK: [[CMP:%.+]] = icmp sgt i32 [[SUB]], 2 +// CHECK: br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] +// +// CHECK: [[IF_THEN]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, +// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align +// CHECK: br label {{%?}}[[END:.+]] +// +// CHECK: [[IF_ELSE]] +// CHECK: store i32 -1, i32* [[RHV]], align +// CHECK: br label {{%?}}[[END]] +// +// CHECK: [[END]] +// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// +// CHECK: [[FAIL]] +// CHECK: call void [[HVT4:@.+]]() +// CHECK: br label {{%?}}[[END]] +// CHECK: [[END]] + + + + + + +// +// CHECK: define {{.*}}[[FTEMPLATE]] +// +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, +// CHECK-NEXT: store i32 [[RET]], i32* [[RHV:%.+]], align +// CHECK-NEXT: [[RET2:%.+]] = load i32, i32* [[RHV]], align +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// +// CHECK: [[FAIL]] +// CHECK: call void [[HVT5:@.+]]({{[^,]+}}) +// CHECK: br label {{%?}}[[END]] +// +// CHECK: [[END]] +// +// +// +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, +// CHECK-NEXT: store i32 [[RET]], i32* [[RHV:%.+]], align +// CHECK-NEXT: [[RET2:%.+]] = load i32, i32* [[RHV]], align +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// +// CHECK: [[FAIL]] +// CHECK: call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}) +// CHECK: br label {{%?}}[[END]] +// CHECK: [[END]] + + + + + + +// Check that the offloading functions are emitted and that the parallel function +// is appropriately guarded. + +// CHECK: define internal void [[HVT1]]([[S1]]* {{%.+}}, i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]]) +// CHECK-DAG: store i[[SZ]] [[PARM1]], i[[SZ]]* [[B_ADDR:%.+]], align +// CHECK-DAG: store i[[SZ]] [[PARM2]], i[[SZ]]* [[CAPE_ADDR:%.+]], align +// CHECK-64: [[CONVB:%.+]] = bitcast i[[SZ]]* [[B_ADDR]] to i32* +// CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i8* +// CHECK-64: [[BV:%.+]] = load i32, i32* [[CONVB]], align +// CHECK-32: [[BV:%.+]] = load i32, i32* [[B_ADDR]], align +// CHECK-64: [[BC:%.+]] = bitcast i64* [[ARGA:%.+]] to i32* +// CHECK-64: store i32 [[BV]], i32* [[BC]], align +// CHECK-64: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[ARGA]], align +// CHECK-32: store i32 [[BV]], i32* [[ARGA:%.+]], align +// CHECK-32: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[ARGA]], align +// CHECK: [[IFC:%.+]] = load i8, i8* [[CONV]], align +// CHECK: [[TB:%.+]] = trunc i8 [[IFC]] to i1 +// CHECK: br i1 [[TB]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] +// +// CHECK: [[IF_THEN]] +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]])* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), [[S1]]* {{.+}}, i[[SZ]] [[ARG]]) +// CHECK: br label {{%?}}[[END:.+]] +// +// CHECK: [[IF_ELSE]] +// CHECK: call void @__kmpc_serialized_parallel( +// CHECK: call void [[OMP_OUTLINED3]](i32* {{%.+}}, i32* {{%.+}}, [[S1]]* {{.+}}, i[[SZ]] [[ARG]]) +// CHECK: call void @__kmpc_end_serialized_parallel( +// CHECK: br label {{%?}}[[END]] +// +// CHECK: [[END]] +// +// + + +// CHECK: define internal void [[HVT2]]([[S1]]* {{%.+}}, i[[SZ]] [[PARM:%.+]]) +// CHECK-DAG: store i[[SZ]] [[PARM]], i[[SZ]]* [[CAPE_ADDR:%.+]], align +// CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i8* +// CHECK: [[IFC:%.+]] = load i8, i8* [[CONV]], align +// CHECK: [[TB:%.+]] = trunc i8 [[IFC]] to i1 +// CHECK: br i1 [[TB]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] +// +// CHECK: [[IF_THEN]] +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*)* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), [[S1]]* {{.+}}) +// CHECK: br label {{%?}}[[END:.+]] +// +// CHECK: [[IF_ELSE]] +// CHECK: call void @__kmpc_serialized_parallel( +// CHECK: call void [[OMP_OUTLINED4]](i32* {{%.+}}, i32* {{%.+}}, [[S1]]* {{.+}}) +// CHECK: call void @__kmpc_end_serialized_parallel( +// CHECK: br label {{%?}}[[END]] +// +// CHECK: [[END]] +// +// + + + + + + + + +// CHECK: define internal void [[HVT3]](i[[SZ]] [[PARM:%.+]]) +// CHECK-DAG: store i[[SZ]] [[PARM]], i[[SZ]]* [[CAPE_ADDR:%.+]], align +// CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i8* +// CHECK: [[IFC:%.+]] = load i8, i8* [[CONV]], align +// CHECK: [[TB:%.+]] = trunc i8 [[IFC]] to i1 +// CHECK: br i1 [[TB]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] +// +// CHECK: [[IF_THEN]] +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*)) +// CHECK: br label {{%?}}[[END:.+]] +// +// CHECK: [[IF_ELSE]] +// CHECK: call void @__kmpc_serialized_parallel( +// CHECK: call void [[OMP_OUTLINED1]](i32* {{%.+}}, i32* {{%.+}}) +// CHECK: call void @__kmpc_end_serialized_parallel( +// CHECK: br label {{%?}}[[END]] +// +// CHECK: [[END]] +// +// +// CHECK: define internal void [[HVT4]]() +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED2:@.+]] to void (i32*, i32*, ...)*)) +// CHECK-NEXT: ret +// +// + + + + + +// CHECK: define internal void [[HVT5]]( +// CHECK-NOT: @__kmpc_fork_call +// CHECK: call void @__kmpc_serialized_parallel( +// CHECK: call void [[OMP_OUTLINED5:@.+]](i32* {{%.+}}, i32* {{%.+}}, i[[SZ]] {{.+}}) +// CHECK: call void @__kmpc_end_serialized_parallel( +// CHECK: ret +// +// + + +// CHECK: define internal void [[HVT6]]( +// CHECK-NOT: call void @__kmpc_serialized_parallel( +// CHECK-NOT: call void [[OMP_OUTLINED5:@.+]](i32* {{%.+}}, i32* {{%.+}}, i[[SZ]] {{.+}}) +// CHECK-NOT: call void @__kmpc_end_serialized_parallel( +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), +// CHECK: ret +// +// + + + +#endif Index: cfe/trunk/tools/libclang/CIndex.cpp =================================================================== --- cfe/trunk/tools/libclang/CIndex.cpp +++ cfe/trunk/tools/libclang/CIndex.cpp @@ -2104,6 +2104,7 @@ } void OMPClauseEnqueue::VisitOMPIfClause(const OMPIfClause *C) { + VisitOMPClauseWithPreInit(C); Visitor->AddStmt(C->getCondition()); }