diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -56,6 +56,11 @@ cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, cl::init(false)); +static cl::opt + DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore, + cl::desc("Disable function internalization."), + cl::Hidden, cl::init(false)); + static cl::opt PrintICVValues("openmp-print-icv-values", cl::init(false), cl::Hidden); static cl::opt PrintOpenMPKernels("openmp-print-gpu-kernels", @@ -3824,7 +3829,8 @@ DenseSet InternalizedFuncs; if (isOpenMPDevice(M)) for (Function &F : M) - if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F)) { + if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) && + !DisableInternalization) { if (Attributor::internalizeFunction(F, /* Force */ true)) { InternalizedFuncs.insert(&F); } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) { diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt @@ -107,6 +107,7 @@ set(cu_cmd ${CLANG_TOOL} -xc++ -c + -mllvm -openmp-opt-disable-internalization -std=c++14 -ffreestanding -target amdgcn-amd-amdhsa diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -153,6 +153,7 @@ # Set flags for LLVM Bitcode compilation. set(bc_flags -S -x c++ -O1 -std=c++14 + -mllvm -openmp-opt-disable-internalization -target nvptx64 -Xclang -emit-llvm-bc -Xclang -aux-triple -Xclang ${aux_triple}