Index: libomptarget/deviceRTLs/nvptx/src/interface.h =================================================================== --- libomptarget/deviceRTLs/nvptx/src/interface.h +++ libomptarget/deviceRTLs/nvptx/src/interface.h @@ -470,9 +470,8 @@ int16_t RequiresDataSharing); EXTERN void __kmpc_spmd_kernel_deinit(); EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, - void ***SharedArgs, int32_t nArgs, int16_t IsOMPRuntimeInitialized); -EXTERN bool __kmpc_kernel_parallel(void **WorkFn, void ***SharedArgs, +EXTERN bool __kmpc_kernel_parallel(void **WorkFn, int16_t IsOMPRuntimeInitialized); EXTERN void __kmpc_kernel_end_parallel(); EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask, Index: libomptarget/deviceRTLs/nvptx/src/omp_data.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/omp_data.cu +++ libomptarget/deviceRTLs/nvptx/src/omp_data.cu @@ -46,8 +46,3 @@ // Scratchpad for teams reduction. //////////////////////////////////////////////////////////////////////////////// __device__ __shared__ void *ReductionScratchpadPtr; - -//////////////////////////////////////////////////////////////////////////////// -// Data sharing related variables. -//////////////////////////////////////////////////////////////////////////////// -__device__ __shared__ omptarget_nvptx_SharedArgs omptarget_nvptx_sharedArgs; Index: libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h =================================================================== --- libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -62,46 +62,6 @@ #define __ACTIVEMASK() __ballot(1) #endif -// arguments needed for L0 parallelism only. -class omptarget_nvptx_SharedArgs { -public: - // All these methods must be called by the master thread only. - INLINE void Init() { - args = buffer; - nArgs = MAX_SHARED_ARGS; - } - INLINE void DeInit() { - // Free any memory allocated for outlined parallel function with a large - // number of arguments. - if (nArgs > MAX_SHARED_ARGS) { - SafeFree(args, (char *)"new extended args"); - Init(); - } - } - INLINE void EnsureSize(int size) { - if (size > nArgs) { - if (nArgs > MAX_SHARED_ARGS) { - SafeFree(args, (char *)"new extended args"); - } - args = (void **) SafeMalloc(size * sizeof(void *), - (char *)"new extended args"); - nArgs = size; - } - } - // Called by all threads. - INLINE void **GetArgs() { return args; }; -private: - // buffer of pre-allocated arguments. - void *buffer[MAX_SHARED_ARGS]; - // pointer to arguments buffer. - // starts off as a pointer to 'buffer' but can be dynamically allocated. - void **args; - // starts off as MAX_SHARED_ARGS but can increase in size. - uint32_t nArgs; -}; - -extern __device__ __shared__ omptarget_nvptx_SharedArgs omptarget_nvptx_sharedArgs; - // Data sharing related quantities, need to match what is used in the compiler. enum DATA_SHARING_SIZES { // The maximum number of workers in a kernel. Index: libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu +++ libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu @@ -54,9 +54,6 @@ PRINT(LD_IO, "call to __kmpc_kernel_init with version %f\n", OMPTARGET_NVPTX_VERSION); - // init parallel work arguments - omptarget_nvptx_sharedArgs.Init(); - if (!RequiresOMPRuntime) { // If OMP runtime is not required don't initialize OMP state. setExecutionParameters(Generic, RuntimeUninitialized); @@ -110,9 +107,6 @@ } // Done with work. Kill the workers. omptarget_nvptx_workFn = 0; - - // Deinit parallel work arguments - omptarget_nvptx_sharedArgs.DeInit(); } EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, Index: libomptarget/deviceRTLs/nvptx/src/option.h =================================================================== --- libomptarget/deviceRTLs/nvptx/src/option.h +++ libomptarget/deviceRTLs/nvptx/src/option.h @@ -28,10 +28,6 @@ // region to synchronize with each other. #define L1_BARRIER (1) -// Maximum number of preallocated arguments to an outlined parallel/simd function. -// Anything more requires dynamic memory allocation. -#define MAX_SHARED_ARGS 20 - // Maximum number of omp state objects per SM allocated statically in global // memory. #if __CUDA_ARCH__ >= 600 Index: libomptarget/deviceRTLs/nvptx/src/parallel.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -214,16 +214,10 @@ // // This routine is always called by the team master.. EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, - void ***SharedArgs, int32_t nArgs, int16_t IsOMPRuntimeInitialized) { PRINT0(LD_IO, "call to __kmpc_kernel_prepare_parallel\n"); omptarget_nvptx_workFn = WorkFn; - if (nArgs > 0) { - omptarget_nvptx_sharedArgs.EnsureSize(nArgs); - *SharedArgs = omptarget_nvptx_sharedArgs.GetArgs(); - } - if (!IsOMPRuntimeInitialized) return; @@ -323,13 +317,11 @@ // // Only the worker threads call this routine. EXTERN bool __kmpc_kernel_parallel(void **WorkFn, - void ***SharedArgs, int16_t IsOMPRuntimeInitialized) { PRINT0(LD_IO | LD_PAR, "call to __kmpc_kernel_parallel\n"); // Work function and arguments for L1 parallel region. *WorkFn = omptarget_nvptx_workFn; - *SharedArgs = omptarget_nvptx_sharedArgs.GetArgs(); if (!IsOMPRuntimeInitialized) return true;