Index: clang/lib/CodeGen/CGStmt.cpp =================================================================== --- clang/lib/CodeGen/CGStmt.cpp +++ clang/lib/CodeGen/CGStmt.cpp @@ -428,7 +428,7 @@ llvm_unreachable("target parallel loop directive not supported yet."); break; case Stmt::OMPParallelMaskedDirectiveClass: - llvm_unreachable("parallel masked directive not supported yet."); + EmitOMPParallelMaskedDirective(cast(*S)); break; } } Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4489,6 +4489,33 @@ checkForLastprivateConditionalUpdate(*this, S); } +void CodeGenFunction::EmitOMPParallelMaskedDirective( + const OMPParallelMaskedDirective &S) { + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'masked' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope PrivateScope(CGF); + emitOMPCopyinClause(CGF, S); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + emitMasked(CGF, S); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); + }; + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen, + emitEmptyBoundParameters); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); +} + void CodeGenFunction::EmitOMPParallelSectionsDirective( const OMPParallelSectionsDirective &S) { // Emit directive as a combined directive that consists of two implicit Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -3585,6 +3585,7 @@ const OMPTargetTeamsDistributeSimdDirective &S); void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S); void EmitOMPInteropDirective(const OMPInteropDirective &S); + void EmitOMPParallelMaskedDirective(const OMPParallelMaskedDirective &S); /// Emit device code for the target directive. static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, Index: clang/lib/Parse/ParseOpenMP.cpp =================================================================== --- clang/lib/Parse/ParseOpenMP.cpp +++ clang/lib/Parse/ParseOpenMP.cpp @@ -2483,8 +2483,8 @@ /// simd' | 'teams distribute parallel for simd' | 'teams distribute /// parallel for' | 'target teams' | 'target teams distribute' | 'target /// teams distribute parallel for' | 'target teams distribute parallel -/// for simd' | 'target teams distribute simd' | 'masked' {clause} -/// annot_pragma_openmp_end +/// for simd' | 'target teams distribute simd' | 'masked' | +/// 'parallel masked' {clause} annot_pragma_openmp_end /// StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( ParsedStmtContext StmtCtx, bool ReadDirectiveWithinMetadirective) { Index: clang/test/OpenMP/parallel_masked.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/parallel_masked.cpp @@ -0,0 +1,109 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 2 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=52 -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +void foo(); + +void masked() { + #pragma omp parallel masked + { + foo(); + } +} + +void maskedFilter() { + const int tid = 1; + #pragma omp parallel masked filter(tid) + { + foo(); + } +} + +void master() { + #pragma omp parallel master + { + foo(); + } +} +// CHECK-LABEL: define dso_local void @_Z6maskedv +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void @.omp_outlined. +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 0) +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: call void @_Z3foov() +// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void @_Z12maskedFilterv +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TID:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 1, ptr [[TID]], align 4 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void @.omp_outlined..1 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: call void @_Z3foov() +// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void @_Z6masterv +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void @.omp_outlined..2 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: call void @_Z3foov() +// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// Index: clang/test/OpenMP/parallel_masked_target.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/parallel_masked_target.cpp @@ -0,0 +1,112 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 2 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=52 -fopenmp-targets=nvptx64 -offload-device-only -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +void foo(); + +void masked() { + #pragma target + #pragma omp parallel masked + { + foo(); + } +} + +void maskedFilter() { + const int tid = 1; + #pragma target + #pragma omp parallel masked filter(tid) + { + foo(); + } +} + +void master() { + #pragma target + #pragma omp parallel master + { + foo(); + } +} +// CHECK-LABEL: define dso_local void @_Z6maskedv +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void @.omp_outlined. +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 0) +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: call void @_Z3foov() +// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +//llvm-l +// CHECK-LABEL: define dso_local void @_Z12maskedFilterv +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TID:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 1, ptr [[TID]], align 4 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void @.omp_outlined..1 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: call void @_Z3foov() +// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void @_Z6masterv +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void @.omp_outlined..2 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: call void @_Z3foov() +// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// Index: openmp/libomptarget/DeviceRTL/include/Interface.h =================================================================== --- openmp/libomptarget/DeviceRTL/include/Interface.h +++ openmp/libomptarget/DeviceRTL/include/Interface.h @@ -260,6 +260,10 @@ void __kmpc_end_master(IdentTy *Loc, int32_t TId); +int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter); + +void __kmpc_end_masked(IdentTy *Loc, int32_t TId); + int32_t __kmpc_single(IdentTy *Loc, int32_t TId); void __kmpc_end_single(IdentTy *Loc, int32_t TId); Index: openmp/libomptarget/DeviceRTL/src/Synchronization.cpp =================================================================== --- openmp/libomptarget/DeviceRTL/src/Synchronization.cpp +++ openmp/libomptarget/DeviceRTL/src/Synchronization.cpp @@ -491,6 +491,13 @@ void __kmpc_end_master(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); } +int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter) { + FunctionTracingRAII(); + return omp_get_thread_num() == Filter; +} + +void __kmpc_end_masked(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); } + int32_t __kmpc_single(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); return __kmpc_master(Loc, TId);