diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2098,6 +2098,14 @@ OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); + + // Ensure we do not inline the function. This is trivially true for the ones + // passed to __kmpc_fork_call but the ones calles in serialized regions + // could be inlined. This is not a perfect but it is closer to the invariant + // we want, namely, every data environment starts with a new function. + // TODO: We should pass the if condition to the runtime function and do the + // handling there. Much cleaner code. + OutlinedFn->addFnAttr(llvm::Attribute::NoInline); RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); // __kmpc_end_serialized_parallel(&Loc, GTid); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1576,11 +1576,6 @@ auto *OutlinedFun = cast(CGOpenMPRuntime::emitParallelOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen)); - if (CGM.getLangOpts().Optimize) { - OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); - OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); - OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); - } IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion; IsInTTDRegion = PrevIsInTTDRegion; if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD && @@ -1698,11 +1693,6 @@ CodeGen.setAction(Action); llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen); - if (CGM.getLangOpts().Optimize) { - OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); - OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); - OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); - } return OutlinedFun; } @@ -2102,6 +2092,14 @@ // Force inline this outlined function at its call site. Fn->setLinkage(llvm::GlobalValue::InternalLinkage); + // Ensure we do not inline the function. This is trivially true for the ones + // passed to __kmpc_fork_call but the ones calles in serialized regions + // could be inlined. This is not a perfect but it is closer to the invariant + // we want, namely, every data environment starts with a new function. + // TODO: We should pass the if condition to the runtime function and do the + // handling there. Much cleaner code. + cast(OutlinedFn)->addFnAttr(llvm::Attribute::NoInline); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); @@ -3134,11 +3132,6 @@ "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule()); CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); Fn->setDoesNotRecurse(); - if (CGM.getLangOpts().Optimize) { - Fn->removeFnAttr(llvm::Attribute::NoInline); - Fn->removeFnAttr(llvm::Attribute::OptimizeNone); - Fn->addFnAttr(llvm::Attribute::AlwaysInline); - } CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); diff --git a/clang/test/OpenMP/parallel_if_codegen.cpp b/clang/test/OpenMP/parallel_if_codegen.cpp --- a/clang/test/OpenMP/parallel_if_codegen.cpp +++ b/clang/test/OpenMP/parallel_if_codegen.cpp @@ -7,6 +7,7 @@ // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple %itanium_abi_triple -emit-llvm %s -disable-O0-optnone -o - | FileCheck %s --check-prefix=WOPT // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s @@ -96,14 +97,20 @@ return tmain(Arg); } +// WOPT: noinline +// WOPT-NOT: optnone // CHECK: define internal {{.*}}void [[CAP_FN4]] // CHECK: call {{.*}}void @{{.+}}fn4 // CHECK: ret void +// WOPT: noinline +// WOPT-NOT: optnone // CHECK: define internal {{.*}}void [[CAP_FN5]] // CHECK: call {{.*}}void @{{.+}}fn5 // CHECK: ret void +// WOPT: noinline +// WOPT-NOT: optnone // CHECK: define internal {{.*}}void [[CAP_FN6]] // CHECK: call {{.*}}void @{{.+}}fn6 // CHECK: ret void @@ -129,14 +136,20 @@ // CHECK: br label %[[OMP_END]] // CHECK: [[OMP_END]] +// WOPT: noinline +// WOPT-NOT: optnone // CHECK: define internal {{.*}}void [[CAP_FN1]] // CHECK: call {{.*}}void @{{.+}}fn1 // CHECK: ret void +// WOPT: noinline +// WOPT-NOT: optnone // CHECK: define internal {{.*}}void [[CAP_FN2]] // CHECK: call {{.*}}void @{{.+}}fn2 // CHECK: ret void +// WOPT: noinline +// WOPT-NOT: optnone // CHECK: define internal {{.*}}void [[CAP_FN3]] // CHECK: call {{.*}}void @{{.+}}fn3 // CHECK: ret void