diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -21,6 +21,7 @@ #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" #include "clang/Basic/PrettyStackTrace.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" using namespace clang; using namespace CodeGen; using namespace llvm::omp; @@ -1318,6 +1319,87 @@ llvm::SmallVectorImpl &) {} void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { + + if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { + // Check if we have any if clause associated with the directive. + llvm::Value *IfCond = nullptr; + if (const auto *C = S.getSingleClause()) + IfCond = EmitScalarExpr(C->getCondition(), + /*IgnoreResultAssign=*/true); + + llvm::Value *NumThreads = nullptr; + if (const auto *NumThreadsClause = S.getSingleClause()) + NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), + /*IgnoreResultAssign=*/true); + + ProcBindKind ProcBind = OMP_PROC_BIND_default; + if (const auto *ProcBindClause = S.getSingleClause()) + ProcBind = ProcBindClause->getProcBindKind(); + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + // The cleanup callback that finalizes all variabels at the given location, + // thus calls destructors etc. + auto FiniCB = [this](InsertPointTy IP) { + CGBuilderTy::InsertPointGuard IPG(Builder); + assert(IP.getBlock()->end() != IP.getPoint() && + "OpenMP IR Builder should cause terminated block!"); + llvm::BasicBlock *IPBB = IP.getBlock(); + llvm::BasicBlock *DestBB = IPBB->splitBasicBlock(IP.getPoint()); + IPBB->getTerminator()->eraseFromParent(); + Builder.SetInsertPoint(IPBB); + CodeGenFunction::JumpDest Dest = getJumpDestInCurrentScope(DestBB); + EmitBranchThroughCleanup(Dest); + }; + + // Privatization callback that performs appropriate action for + // shared/private/firstprivate/lastprivate/copyin/... variables. + // + // TODO: This defaults to shared right now. + auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + llvm::Value &Val, llvm::Value *&ReplVal) { + // The next line is appropriate only for variables (Val) with the + // data-sharing attribute "shared". + ReplVal = &Val; + + return CodeGenIP; + }; + + const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); + const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); + + auto BodyGenCB = [ParallelRegionBodyStmt, + this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + llvm::BasicBlock &ContinuationBB) { + auto OldAllocaIP = AllocaInsertPt; + AllocaInsertPt = &*AllocaIP.getPoint(); + + auto OldReturnBlock = ReturnBlock; + ReturnBlock = getJumpDestInCurrentScope(&ContinuationBB); + + llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); + CodeGenIPBB->splitBasicBlock(CodeGenIP.getPoint()); + llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator(); + CodeGenIPBBTI->removeFromParent(); + + Builder.SetInsertPoint(CodeGenIPBB); + + EmitStmt(ParallelRegionBodyStmt); + + Builder.Insert(CodeGenIPBBTI); + + AllocaInsertPt = OldAllocaIP; + ReturnBlock = OldReturnBlock; + }; + + CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); + Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB, + FiniCB, IfCond, NumThreads, + ProcBind, S.hasCancel())); + return; + } + // Emit parallel region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); @@ -4747,6 +4829,19 @@ break; } } + if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { + // TODO: This check is necessary as we only generate `omp parallel` through + // the OpenMPIRBuilder for now. + if (S.getCancelRegion() == OMPD_parallel) { + llvm::Value *IfCondition = nullptr; + if (IfCond) + IfCondition = EmitScalarExpr(IfCond, + /*IgnoreResultAssign=*/true); + return Builder.restoreIP( + OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion())); + } + } + CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, S.getCancelRegion()); } diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,CHECK // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,CHECK -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,IRBUILDER // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s @@ -16,7 +16,7 @@ float flag; int main (int argc, char **argv) { -// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num( +// ALL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num( #pragma omp parallel { #pragma omp cancel parallel if(flag) @@ -24,15 +24,15 @@ #pragma omp barrier argv[0][0] += argc; } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( +// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( #pragma omp sections { #pragma omp cancel sections } -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: call i32 @__kmpc_cancel( -// CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_barrier(%struct.ident_t* +// ALL: call void @__kmpc_for_static_init_4( +// ALL: call i32 @__kmpc_cancel( +// ALL: call void @__kmpc_for_static_fini( +// ALL: call void @__kmpc_barrier(%struct.ident_t* #pragma omp sections { #pragma omp cancel sections @@ -41,53 +41,53 @@ #pragma omp cancel sections } } -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3) -// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 -// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]] -// CHECK: [[EXIT]] -// CHECK: br label -// CHECK: [[CONTINUE]] -// CHECK: br label -// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3) -// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 -// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]] -// CHECK: [[EXIT]] -// CHECK: br label -// CHECK: [[CONTINUE]] -// CHECK: br label -// CHECK: call void @__kmpc_for_static_fini( +// ALL: call void @__kmpc_for_static_init_4( +// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3) +// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 +// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]] +// ALL: [[EXIT]] +// ALL: br label +// ALL: [[CONTINUE]] +// ALL: br label +// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3) +// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 +// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]] +// ALL: [[EXIT]] +// ALL: br label +// ALL: [[CONTINUE]] +// ALL: br label +// ALL: call void @__kmpc_for_static_fini( #pragma omp for for (int i = 0; i < argc; ++i) { #pragma omp cancel for if(cancel: flag) } -// CHECK: call void @__kmpc_for_static_init_4( -// CHECK: [[FLAG:%.+]] = load float, float* @{{.+}}, -// CHECK: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00 -// CHECK: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]] -// CHECK: [[THEN]] -// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 2) -// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 -// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]] -// CHECK: [[EXIT]] -// CHECK: br label -// CHECK: [[CONTINUE]] -// CHECK: br label -// CHECK: [[ELSE]] -// CHECK: br label -// CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_barrier(%struct.ident_t* +// ALL: call void @__kmpc_for_static_init_4( +// ALL: [[FLAG:%.+]] = load float, float* @{{.+}}, +// ALL: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00 +// ALL: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]] +// ALL: [[THEN]] +// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 2) +// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 +// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]] +// ALL: [[EXIT]] +// ALL: br label +// ALL: [[CONTINUE]] +// ALL: br label +// ALL: [[ELSE]] +// ALL: br label +// ALL: call void @__kmpc_for_static_fini( +// ALL: call void @__kmpc_barrier(%struct.ident_t* #pragma omp task { #pragma omp cancel taskgroup } -// CHECK: call i8* @__kmpc_omp_task_alloc( -// CHECK: call i32 @__kmpc_omp_task( +// ALL: call i8* @__kmpc_omp_task_alloc( +// ALL: call i32 @__kmpc_omp_task( #pragma omp parallel sections { #pragma omp cancel sections } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( +// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( #pragma omp parallel sections { #pragma omp cancel sections @@ -96,14 +96,14 @@ #pragma omp cancel sections } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( +// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( int r = 0; #pragma omp parallel for reduction(+: r) for (int i = 0; i < argc; ++i) { #pragma omp cancel for r += i; } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( +// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( return argc; } @@ -173,4 +173,29 @@ // CHECK: call void @__kmpc_for_static_fini( // CHECK: ret void +// IRBUILDER: define internal void @main + +// IRBUILDER: [[RETURN:omp.par.exit[^:]*]] +// IRBUILDER-NEXT: ret void +// IRBUILDER: [[FLAG:%.+]] = load float, float* @{{.+}}, + +// IRBUILDER: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00 +// IRBUILDER: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]] +// IRBUILDER: [[ELSE]] +// IRBUILDER-NEXT: br label %[[ELSE2:.*]] +// IRBUILDER: [[ELSE2]] +// The barrier directive should now call __kmpc_cancel_barrier +// IRBUILDER: call i32 @__kmpc_cancel_barrier(%struct.ident_t* +// IRBUILDER: br label +// IRBUILDER: [[THEN]] +// IRBUILDER: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 1) +// IRBUILDER: [[CMP:%.+]] = icmp eq i32 [[RES]], 0 +// IRBUILDER: br i1 [[CMP]], label %[[CONTINUE:[^,].+]], label %[[EXIT:.+]] +// IRBUILDER: [[EXIT]] +// IRBUILDER: br label %[[EXIT2:.+]] +// IRBUILDER: [[EXIT2]] +// IRBUILDER: br label %[[RETURN]] +// IRBUILDER: [[CONTINUE]] +// IRBUILDER: br label %[[ELSE:.+]] + #endif diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -1,22 +1,31 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,CHECK // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEBUG %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,CHECK-DEBUG %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,IRBUILDER-DEBUG %s // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics #ifndef HEADER #define HEADER -// CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } -// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// ALL-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } +// ALL-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// ALL-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } // CHECK-DEBUG-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } // CHECK-DEBUG-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" // CHECK-DEBUG-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } -// CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+19]];1;;\00" -// CHECK-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+8]];1;;\00" +// CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+22]];1;;\00" +// CHECK-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+11]];1;;\00" +// IRBUILDER-DEBUG-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } +// IRBUILDER-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+19]];0;;\00" +// IRBUILDER-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+8]];0;;\00" template void foo(T argc) {} @@ -39,65 +48,74 @@ return tmain(argv); } -// CHECK-LABEL: define {{[a-z\_\b]*[ ]?i32}} @main({{i32[ ]?[a-z]*}} %argc, i8** %argv) -// CHECK: store i32 %argc, i32* [[ARGC_ADDR:%.+]], -// CHECK: [[VLA:%.+]] = alloca i32, i{{[0-9]+}} [[VLA_SIZE:%[^,]+]], +// ALL-LABEL: define {{[a-z\_\b]*[ ]?i32}} @main({{i32[ ]?[a-z]*}} %argc, i8** %argv) +// ALL: store i32 %argc, i32* [[ARGC_ADDR:%.+]], +// ALL: [[VLA:%.+]] = alloca i32, i{{[0-9]+}} [[VLA_SIZE:%[^,]+]], // CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i{{[0-9]+}}, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i{{[0-9]+}} [[VLA_SIZE]], i32* [[VLA]]) -// CHECK-NEXT: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}} -// CHECK-NEXT: [[RET:%.+]] = call {{[a-z\_\b]*[ ]?i32}} [[TMAIN:@.+tmain.+]](i8** [[ARGV]]) -// CHECK: ret i32 -// CHECK-NEXT: } -// CHECK-DEBUG-LABEL: define i32 @main(i32 %argc, i8** %argv) +// IRBUILDER: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* [[VLA]]) +// ALL: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}} +// ALL-NEXT: [[RET:%.+]] = call {{[a-z\_\b]*[ ]?i32}} [[TMAIN:@.+tmain.+]](i8** [[ARGV]]) +// ALL: ret i32 +// ALL-NEXT: } +// ALL-DEBUG-LABEL: define i32 @main(i32 %argc, i8** %argv) // CHECK-DEBUG: [[LOC_2_ADDR:%.+]] = alloca %struct.ident_t // CHECK-DEBUG: [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[LOC_2_ADDR]] to i8* // CHECK-DEBUG-NEXT: [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[DEF_LOC_2]] to i8* // CHECK-DEBUG-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[KMPC_LOC_VOIDPTR]], i8* align 8 [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i1 false) -// CHECK-DEBUG: store i32 %argc, i32* [[ARGC_ADDR:%.+]], -// CHECK-DEBUG: [[VLA:%.+]] = alloca i32, i64 [[VLA_SIZE:%[^,]+]], +// ALL-DEBUG: store i32 %argc, i32* [[ARGC_ADDR:%.+]], +// ALL-DEBUG: [[VLA:%.+]] = alloca i32, i64 [[VLA_SIZE:%[^,]+]], // CHECK-DEBUG: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %struct.ident_t, %struct.ident_t* [[LOC_2_ADDR]], i32 0, i32 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC1]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]] // CHECK-DEBUG: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[LOC_2_ADDR]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 [[VLA_SIZE]], i32* [[VLA]]) -// CHECK-DEBUG-NEXT: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}} -// CHECK-DEBUG-NEXT: [[RET:%.+]] = call i32 [[TMAIN:@.+tmain.+]](i8** [[ARGV]]) -// CHECK-DEBUG: ret i32 -// CHECK-DEBUG-NEXT: } +// IRBUILDER-DEBUG: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* [[VLA]]) +// ALL-DEBUG: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}} +// ALL-DEBUG: [[RET:%.+]] = call i32 [[TMAIN:@.+tmain.+]](i8** [[ARGV]]) +// ALL-DEBUG: ret i32 +// ALL-DEBUG-NEXT: } -// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i{{[0-9]+}}{{.*}} [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]]) -// CHECK-SAME: #[[FN_ATTRS:[0-9]+]] +// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i{{[0-9]+}}{{.*}} [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]]) +// CHECK-SAME: #[[FN_ATTRS:[0-9]+]] +// IRBUILDER: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i32* [[VLA_REF:%[^)]+]]) +// IRBUILDER-SAME: #[[FN_ATTRS:[0-9]+]] // CHECK: store i32* [[VLA_ADDR]], i32** [[VLA_PTR_ADDR:%.+]], // CHECK: [[VLA_REF:%.+]] = load i32*, i32** [[VLA_PTR_ADDR]] -// CHECK: [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i{{[0-9]+}} 1 -// CHECK-NEXT: [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]] +// ALL: [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i{{[0-9]+}} 1 +// ALL-NEXT: [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]] // CHECK-NEXT: invoke {{.*}}void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[VLA_ELEM]]) +// IRBUILDER: call {{.*}}void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[VLA_ELEM]]) // CHECK: ret void // CHECK: call {{.*}}void @{{.+terminate.*|abort}}( // CHECK-NEXT: unreachable // CHECK-NEXT: } -// CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]]) -// CHECK-DEBUG-SAME: #[[FN_ATTRS:[0-9]+]] +// CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]]) +// CHECK-DEBUG-SAME: #[[FN_ATTRS:[0-9]+]] +// IRBUILDER-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i32* [[VLA_REF:%[^)]+]]) +// IRBUILDER-DEBUG-SAME: #[[FN_ATTRS:[0-9]+]] // CHECK-DEBUG: store i32* [[VLA_ADDR]], i32** [[VLA_PTR_ADDR:%.+]], // CHECK-DEBUG: [[VLA_REF:%.+]] = load i32*, i32** [[VLA_PTR_ADDR]] -// CHECK-DEBUG: [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i64 1 -// CHECK-DEBUG-NEXT: [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]] +// ALL-DEBUG: [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i64 1 +// ALL-DEBUG-NEXT: [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]] // CHECK-DEBUG-NEXT: invoke void [[FOO:@.+foo.+]](i32 [[VLA_ELEM]]) +// IRBUILDER-DEBUG-NEXT: call void [[FOO:@.+foo.+]](i32 [[VLA_ELEM]]) // CHECK-DEBUG: ret void // CHECK-DEBUG: call void @{{.+terminate.*|abort}}( // CHECK-DEBUG-NEXT: unreachable // CHECK-DEBUG-NEXT: } -// CHECK-DAG: define linkonce_odr {{.*}}void [[FOO]]({{i32[ ]?[a-z]*}} %argc) -// CHECK-DAG: declare !callback ![[cbid:[0-9]+]] {{.*}}void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) -// CHECK-DEBUG-DAG: define linkonce_odr void [[FOO]](i32 %argc) -// CHECK-DEBUG-DAG: declare !callback ![[cbid:[0-9]+]] void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) +// ALL-DAG: define linkonce_odr {{.*}}void [[FOO]]({{i32[ ]?[a-z]*}} %argc) +// ALL-DAG: declare !callback ![[cbid:[0-9]+]] {{.*}}void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) +// ALL-DEBUG-DAG: define linkonce_odr void [[FOO]](i32 %argc) +// ALL-DEBUG-DAG: declare !callback ![[cbid:[0-9]+]] void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) // CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]]) // CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]] -// CHECK: define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc) -// CHECK: store i8** %argc, i8*** [[ARGC_ADDR:%.+]], +// ALL: define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc) +// ALL: store i8** %argc, i8*** [[ARGC_ADDR:%.+]], // CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i{{64|32}})* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i{{64|32}} %{{.+}}) -// CHECK-NEXT: ret i32 0 -// CHECK-NEXT: } -// CHECK-DEBUG: define linkonce_odr i32 [[TMAIN]](i8** %argc) +// IRBUILDER: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}}) +// ALL: ret i32 0 +// ALL-NEXT: } +// ALL-DEBUG: define linkonce_odr i32 [[TMAIN]](i8** %argc) // CHECK-DEBUG-DAG: [[LOC_2_ADDR:%.+]] = alloca %struct.ident_t // CHECK-DEBUG: [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[LOC_2_ADDR]] to i8* // CHECK-DEBUG-NEXT: [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[DEF_LOC_2]] to i8* @@ -106,35 +124,40 @@ // CHECK-DEBUG: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %struct.ident_t, %struct.ident_t* [[LOC_2_ADDR]], i32 0, i32 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC2]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]] // CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[LOC_2_ADDR]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}}) -// CHECK-DEBUG-NEXT: ret i32 0 -// CHECK-DEBUG-NEXT: } +// IRBUILDER-DEBUG: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}}) +// ALL-DEBUG: ret i32 0 +// ALL-DEBUG-NEXT: } // CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i{{64|32}}{{.*}} %{{.+}}) +// IRBUILDER: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i8*** [[ARGC_REF:%.*]], i{{64|32}}{{.*}} %{{.+}}) // CHECK: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]], // CHECK: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]] -// CHECK: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] +// ALL: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] // CHECK-NEXT: invoke {{.*}}void [[FOO1:@.+foo.+]](i8** [[ARGC]]) +// IRBUILDER-NEXT: call {{.*}}void [[FOO1:@.+foo.+]](i8** [[ARGC]]) // CHECK: ret void // CHECK: call {{.*}}void @{{.+terminate.*|abort}}( // CHECK-NEXT: unreachable // CHECK-NEXT: } // CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i64 %{{.+}}) +// IRBUILDER-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i8*** [[ARGC_REF:%.*]], i64 %{{.+}}) // CHECK-DEBUG: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]], // CHECK-DEBUG: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]] -// CHECK-DEBUG: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] +// ALL-DEBUG: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] // CHECK-DEBUG-NEXT: invoke void [[FOO1:@.+foo.+]](i8** [[ARGC]]) +// IRBUILDER-DEBUG-NEXT: call void [[FOO1:@.+foo.+]](i8** [[ARGC]]) // CHECK-DEBUG: ret void // CHECK-DEBUG: call void @{{.+terminate.*|abort}}( // CHECK-DEBUG-NEXT: unreachable // CHECK-DEBUG-NEXT: } -// CHECK: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc) +// ALL: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc) // CHECK-DEBUG-DAG: define linkonce_odr void [[FOO1]](i8** %argc) // CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i64 %{{.+}}) // CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]({{[^)]+}}){{[^,]*}}, !dbg -// CHECK: attributes #[[FN_ATTRS]] = {{.+}} nounwind -// CHECK-DEBUG: attributes #[[FN_ATTRS]] = {{.+}} nounwind -// CHECK: ![[cbid]] = !{![[cbidb:[0-9]+]]} -// CHECK: ![[cbidb]] = !{i64 2, i64 -1, i64 -1, i1 true} +// ALL: attributes #[[FN_ATTRS]] = {{.+}} nounwind +// ALL-DEBUG: attributes #[[FN_ATTRS]] = {{.+}} nounwind +// ALL: ![[cbid]] = !{![[cbidb:[0-9]+]]} +// ALL: ![[cbidb]] = !{i64 2, i64 -1, i64 -1, i1 true} #endif diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -204,6 +204,9 @@ #define __OMP_RTL_ATTRS(Name, FnAttrSet, RetAttrSet, ArgAttrSets) \ OMP_RTL_ATTRS(OMPRTL_##Name, FnAttrSet, RetAttrSet, ArgAttrSets) +__OMP_RTL_ATTRS(__kmpc_fork_call, AttributeSet(EnumAttr(NoUnwind)), + AttributeSet(), {}) + __OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {}) diff --git a/llvm/lib/Frontend/OpenMP/OMPConstants.cpp b/llvm/lib/Frontend/OpenMP/OMPConstants.cpp --- a/llvm/lib/Frontend/OpenMP/OMPConstants.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPConstants.cpp @@ -65,7 +65,7 @@ #define OMP_TYPE(VarName, InitValue) VarName = InitValue; #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \ - VarName##Ptr = PointerType::getUnqual(T); + VarName##Ptr = PointerType::getUnqual(VarName); #define OMP_STRUCT_TYPE(VarName, StructName, ...) \ T = M.getTypeByName(StructName); \ if (!T) \ diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" @@ -510,10 +511,21 @@ dbgs() << " PBR: " << BB->getName() << "\n"; }); + // Add some known attributes to the outlined function. Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); + OutlinedFn->addParamAttr(0, Attribute::NoAlias); + OutlinedFn->addParamAttr(1, Attribute::NoAlias); + OutlinedFn->addFnAttr(Attribute::NoUnwind); + OutlinedFn->addFnAttr(Attribute::NoRecurse); + LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n"); LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); + // For compability with the clang CG we move the outlined function after the + // one with the parallel region. + OutlinedFn->removeFromParent(); + M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); + // Remove the artificial entry introduced by the extractor right away, we // made our own entry block after all. { @@ -544,6 +556,23 @@ RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call); + if (auto *F = dyn_cast(RTLFn.getCallee())) { + if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { + llvm::LLVMContext &Ctx = F->getContext(); + MDBuilder MDB(Ctx); + // Annotate the callback behavior of the __kmpc_fork_call: + // - The callback callee is argument number 2 (microtask). + // - The first two arguments of the callback callee are unknown (-1). + // - All variadic arguments to the __kmpc_fork_call are passed to the + // callback callee. + F->addMetadata( + llvm::LLVMContext::MD_callback, + *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( + 2, {-1, -1}, + /* VarArgsArePassed */ true)})); + } + } + Builder.CreateCall(RTLFn, RealArgs); LLVM_DEBUG(dbgs() << "With fork_call placed: " diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -376,6 +376,10 @@ Function *OutlinedFn = PrivAI->getFunction(); EXPECT_NE(F, OutlinedFn); EXPECT_FALSE(verifyModule(*M)); + EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind)); + EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoRecurse)); + EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias)); + EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias)); EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); EXPECT_EQ(OutlinedFn->arg_size(), 3U);