This is an archive of the discontinued LLVM Phabricator instance.

[OpenMP][PoC][WIP] An OpenMP-based OpenMP device runtime [NOT FOR COMMIT]
AbandonedPublic

Authored by jdoerfert on Apr 4 2020, 11:49 AM.

Download Raw Diff

Details

Reviewers

Summary

In order to make the device runtime truly device independent and thereby
reusable, e.g., for testing on the host, we want to eliminate the
remaining non-portable code. This basically boils down to device,
shared, and some other attributes, e.g., forceinline.

NOTE: This does not yet create a valid .bc file as the IR files we get via c++ are not directly usable by llvm-link/opt/.... I did not investigate how to extract the pure IR. The entire nvptx compilation process should be re-investigated as we might not need to link anything from cuda in.

Diff Detail

Repository: rG LLVM Github Monorepo

Unit TestsFailed

	Time	Test
	70 ms	Clang.SemaObjC::Unknown Unit Message ("")
	160 ms	lldb-unit.Host/_/HostTests::Unknown Unit Message ("")

Event Timeline

jdoerfert created this revision.Apr 4 2020, 11:49 AM

Herald added a project: Restricted Project. · View Herald TranscriptApr 4 2020, 11:49 AM

Herald added subscribers: guansong, bollu, yaxunl, mgorny. · View Herald Transcript

JonChesterfield added a subscriber: JonChesterfield.Apr 4 2020, 12:02 PM

Harbormaster failed remote builds in B51790: Diff 255069!Apr 4 2020, 12:14 PM

jdoerfert edited the summary of this revision. (Show Details)Apr 4 2020, 5:24 PM

Use pragma syntax for allocators (e.g., to replace shared)

Harbormaster failed remote builds in B51800: Diff 255091!Apr 4 2020, 6:38 PM

grokos added a subscriber: grokos.Jul 2 2020, 1:13 PM

Herald added a subscriber: sstefan1. · View Herald TranscriptJul 2 2020, 1:13 PM

tianshilei1992 added a subscriber: tianshilei1992.Jul 7 2020, 2:48 PM

JonChesterfield added a subscriber: saiislam.Jul 9 2020, 10:48 AM

jdoerfert mentioned this in D94698: [OpenMP][WIP][POC] Compile the device runtime as C++.Jan 14 2021, 9:17 AM

We moved to a CUDA-free runtime now.

Herald added a reviewer: bollu. · View Herald TranscriptJan 28 2021, 8:16 AM

Revision Contents

Path

Size

openmp/

libomptarget/

deviceRTLs/

common/

device_environment.h

4 lines

src/

cancel.cpp

33 lines

cancel.cu

	critical.cpp
	critical.cu

17 lines

critical.cu

support.h

2 lines

target.h

45 lines

nvptx/

CMakeLists.txt

31 lines

src/

nvptx_interface.h

5 lines

Diff 255091

openmp/libomptarget/deviceRTLs/common/device_environment.h

	//===---- device_environment.h - OpenMP GPU device environment --- CUDA -*-===//			//===---- device_environment.h - OpenMP GPU device environment --- CUDA -*-===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//
	// Global device environment			// Global device environment
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_			#ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_
	#define _OMPTARGET_DEVICE_ENVIRONMENT_H_			#define _OMPTARGET_DEVICE_ENVIRONMENT_H_

	#include "target_impl.h"			#include "common/target.h"

	struct omptarget_device_environmentTy {			struct omptarget_device_environmentTy {
	int32_t debug_level;			int32_t debug_level;
	};			};

	extern DEVICE omptarget_device_environmentTy omptarget_device_environment;			__LEAGUE_VAR(extern omptarget_device_environmentTy, omptarget_device_environment)

	#endif			#endif

openmp/libomptarget/deviceRTLs/common/src/cancel.cpp

This file was added.

				//===------ cancel.cpp - NVPTX OpenMP cancel interface ------------ c++ -*-===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				//
				// Interface to be used in the implementation of OpenMP cancel.
				//
				//===----------------------------------------------------------------------===//

				//#include "common/debug.h"
				#include "common/target.h"

				struct kmp_Ident;

				__DEVICE_SCOPE_BEGIN()

				int32_t __kmpc_cancellationpoint(kmp_Ident *loc, int32_t global_tid,
				int32_t cancelVal) {
				//PRINT(LD_IO, "call kmpc_cancellationpoint(cancel val %d)\n", (int)cancelVal);
				// disabled
				return 1;
				}

				int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid, int32_t cancelVal) {
				//PRINT(LD_IO, "call kmpc_cancel(cancel val %d)\n", (int)cancelVal);
				// disabled
				return 0;
				}

				__DEVICE_SCOPE_END()

openmp/libomptarget/deviceRTLs/common/src/cancel.cu

This file was deleted.

	//===------ cancel.cu - NVPTX OpenMP cancel interface ------------ CUDA -*-===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// Interface to be used in the implementation of OpenMP cancel.
	//
	//===----------------------------------------------------------------------===//

	#include "interface.h"
	#include "common/debug.h"

	EXTERN int32_t __kmpc_cancellationpoint(kmp_Ident *loc, int32_t global_tid,
	int32_t cancelVal) {
	PRINT(LD_IO, "call kmpc_cancellationpoint(cancel val %d)\n", (int)cancelVal);
	// disabled
	return 0;
	}

	EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid,
	int32_t cancelVal) {
	PRINT(LD_IO, "call kmpc_cancel(cancel val %d)\n", (int)cancelVal);
	// disabled
	return 0;
	}

openmp/libomptarget/deviceRTLs/common/src/critical.cpp

This file was moved from openmp/libomptarget/deviceRTLs/common/src/critical.cu.

	//===------ critical.cu - NVPTX OpenMP critical ------------------ CUDA -*-===//			//===------ critical.cpp - NVPTX OpenMP critical ------------------ c++ -*-===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//
	// This file contains the implementation of critical with KMPC interface			// This file contains the implementation of critical with KMPC interface
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

				//#include "common/debug.h"
				#include "nvptx_interface.h"
	#include "interface.h"			#include "interface.h"
	#include "common/debug.h"

	EXTERN			#include "common/target.h"

				__DEVICE_SCOPE_BEGIN()

	void __kmpc_critical(kmp_Ident *loc, int32_t global_tid,			void __kmpc_critical(kmp_Ident *loc, int32_t global_tid,
	kmp_CriticalName *lck) {			kmp_CriticalName *lck) {
	PRINT0(LD_IO, "call to kmpc_critical()\n");			//PRINT0(LD_IO, "call to kmpc_critical()\n");
	omp_set_lock((omp_lock_t *)lck);			omp_set_lock((omp_lock_t *)lck);
	}			}

	EXTERN
	void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid,			void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid,
	kmp_CriticalName *lck) {			kmp_CriticalName *lck) {
	PRINT0(LD_IO, "call to kmpc_end_critical()\n");			//PRINT0(LD_IO, "call to kmpc_end_critical()\n");
	omp_unset_lock((omp_lock_t *)lck);			omp_unset_lock((omp_lock_t *)lck);
	}			}

				__DEVICE_SCOPE_END()

openmp/libomptarget/deviceRTLs/common/src/critical.cu

This file was moved to openmp/libomptarget/deviceRTLs/common/src/critical.cpp.

openmp/libomptarget/deviceRTLs/common/support.h

	//===--------- support.h - OpenMP GPU support functions ---------- CUDA -*-===//			//===--------- support.h - OpenMP GPU support functions ---------- CUDA -*-===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//
	// Wrapper to some functions natively supported by the GPU.			// Wrapper to some functions natively supported by the GPU.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef OMPTARGET_SUPPORT_H			#ifndef OMPTARGET_SUPPORT_H
	#define OMPTARGET_SUPPORT_H			#define OMPTARGET_SUPPORT_H

	#include "interface.h"
	#include "target_impl.h"			#include "target_impl.h"
				#include "interface.h"

	////////////////////////////////////////////////////////////////////////////////			////////////////////////////////////////////////////////////////////////////////
	// Execution Parameters			// Execution Parameters
	////////////////////////////////////////////////////////////////////////////////			////////////////////////////////////////////////////////////////////////////////
	enum ExecutionMode {			enum ExecutionMode {
	Spmd = 0x00u,			Spmd = 0x00u,
	Generic = 0x01u,			Generic = 0x01u,
	ModeMask = 0x01u,			ModeMask = 0x01u,
	▲ Show 20 Lines • Show All 74 Lines • Show Last 20 Lines

openmp/libomptarget/deviceRTLs/common/target.h

This file was added.

				//===---- target.h - OpenMP defines and helpers for target code --- C++ -*-===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				//
				// Defines and helpers for target code.
				//
				//===----------------------------------------------------------------------===//

				#ifndef OMPTARGET_TARGET_H
				#define OMPTARGET_TARGET_H

				#include <cstdint>

				#define __p(STR) _Pragma(STR)
				#define __p2(STR) __p(#STR)

				#define __DEVICE_SCOPE_BEGIN() \
				extern "C" { \
				__p("omp declare target")

				#define __DEVICE_SCOPE_END() \
				__p("omp end declare target") \
				} /* extern "C" */

				#define __CONSTEXPR static constexpr __attribute__((nothrow, always_inline))

				#define __LEAGUE_VAR(TYPE, NAME) \
				TYPE NAME [[clang::loader_uninitialized]]; \
				__p2(omp declare target to(NAME))

				#define __TEAM_VAR(TYPE, NAME) \
				TYPE NAME [[clang::loader_uninitialized]]; \
				__p2(omp allocate(NAME) allocator(omp_pteam_mem_alloc)) \
				__p2(omp declare target to(NAME))

				#define __THREAD_VAR(TYPE, NAME) \
				TYPE NAME [[clang::loader_uninitialized]]; \
				__p2(omp allocate(NAME) allocator(omp_thread_mem_alloc)) \
				__p2(omp declare target to(NAME))

				#endif

openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt

Show First 20 Lines • Show All 44 Lines • ▼ Show 20 Lines

if(LIBOMPTARGET_DEP_CUDA_FOUND)		if(LIBOMPTARGET_DEP_CUDA_FOUND)
libomptarget_say("Building CUDA offloading device RTL.")		libomptarget_say("Building CUDA offloading device RTL.")

# We really don't have any host code, so we don't need to care about		# We really don't have any host code, so we don't need to care about
# propagating host flags.		# propagating host flags.
set(CUDA_PROPAGATE_HOST_FLAGS OFF)		set(CUDA_PROPAGATE_HOST_FLAGS OFF)

		set(cxx_src_files
		${devicertl_common_directory}/src/cancel.cpp
		${devicertl_common_directory}/src/critical.cpp
		)

set(cuda_src_files		set(cuda_src_files
${devicertl_common_directory}/src/cancel.cu
${devicertl_common_directory}/src/critical.cu
${devicertl_common_directory}/src/data_sharing.cu		${devicertl_common_directory}/src/data_sharing.cu
${devicertl_common_directory}/src/libcall.cu		${devicertl_common_directory}/src/libcall.cu
${devicertl_common_directory}/src/loop.cu		${devicertl_common_directory}/src/loop.cu
${devicertl_common_directory}/src/omptarget.cu		${devicertl_common_directory}/src/omptarget.cu
${devicertl_common_directory}/src/parallel.cu		${devicertl_common_directory}/src/parallel.cu
${devicertl_common_directory}/src/reduction.cu		${devicertl_common_directory}/src/reduction.cu
${devicertl_common_directory}/src/support.cu		${devicertl_common_directory}/src/support.cu
${devicertl_common_directory}/src/sync.cu		${devicertl_common_directory}/src/sync.cu
Show All 24 Lines	if(LIBOMPTARGET_DEP_CUDA_FOUND)
if(${LIBOMPTARGET_NVPTX_DEBUG})		if(${LIBOMPTARGET_NVPTX_DEBUG})
set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1 -g --ptxas-options=-v)		set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1 -g --ptxas-options=-v)
endif()		endif()

# NVPTX runtime library has to be statically linked. Dynamic linking is not		# NVPTX runtime library has to be statically linked. Dynamic linking is not
# yet supported by the CUDA toolchain on the device.		# yet supported by the CUDA toolchain on the device.
set(BUILD_SHARED_LIBS OFF)		set(BUILD_SHARED_LIBS OFF)
set(CUDA_SEPARABLE_COMPILATION ON)		set(CUDA_SEPARABLE_COMPILATION ON)
		list(APPEND CXX_FLAGS -I${devicertl_base_directory}
		-I${devicertl_nvptx_directory}/src)
list(APPEND CUDA_NVCC_FLAGS -I${devicertl_base_directory}		list(APPEND CUDA_NVCC_FLAGS -I${devicertl_base_directory}
-I${devicertl_nvptx_directory}/src)		-I${devicertl_nvptx_directory}/src)
cuda_add_library(omptarget-nvptx STATIC ${cuda_src_files} ${omp_data_objects}		cuda_add_library(omptarget-nvptx STATIC ${cuda_src_files} ${omp_data_objects}
OPTIONS ${CUDA_ARCH} ${CUDA_DEBUG})		OPTIONS ${CUDA_ARCH} ${CUDA_DEBUG})

# Install device RTL under the lib destination folder.		# Install device RTL under the lib destination folder.
install(TARGETS omptarget-nvptx ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}")		install(TARGETS omptarget-nvptx ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}")

▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	if (${LIBOMPTARGET_NVPTX_ENABLE_BCLIB})

# Create target to build all Bitcode libraries.		# Create target to build all Bitcode libraries.
add_custom_target(omptarget-nvptx-bc)		add_custom_target(omptarget-nvptx-bc)

# Generate a Bitcode library for all the compute capabilities the user requested.		# Generate a Bitcode library for all the compute capabilities the user requested.
foreach(sm ${nvptx_sm_list})		foreach(sm ${nvptx_sm_list})
set(cuda_arch --cuda-gpu-arch=sm_${sm})		set(cuda_arch --cuda-gpu-arch=sm_${sm})

# Compile CUDA files to bitcode.		# Compile C++ files to bitcode.
set(bc_files "")		set(bc_files "")
		foreach(src ${cxx_src_files})
		get_filename_component(infile ${src} ABSOLUTE)
		get_filename_component(outfile ${src} NAME)

		add_custom_command(OUTPUT ${outfile}-sm_${sm}.bc
		COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${bc_flags} ${cuda_arch}
		-c ${infile} -o ${outfile}-sm_${sm}.bc -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda
		-Xclang -fopenmp-is-device
		-Xclang -aux-triple -Xclang x86_64-unknown-linux
		DEPENDS ${infile}
		IMPLICIT_DEPENDS CXX ${infile}
		COMMENT "Building LLVM bitcode ${outfile}-sm_${sm}.bc"
		VERBATIM
		)
		set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}-sm_${sm}.bc)

		list(APPEND bc_files ${outfile}-sm_${sm}.bc)
		endforeach()

		# Compile CUDA files to bitcode.
foreach(src ${cuda_src_files})		foreach(src ${cuda_src_files})
get_filename_component(infile ${src} ABSOLUTE)		get_filename_component(infile ${src} ABSOLUTE)
get_filename_component(outfile ${src} NAME)		get_filename_component(outfile ${src} NAME)

add_custom_command(OUTPUT ${outfile}-sm_${sm}.bc		add_custom_command(OUTPUT ${outfile}-sm_${sm}.bc
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${bc_flags} ${cuda_arch}		COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${bc_flags} ${cuda_arch}
-c ${infile} -o ${outfile}-sm_${sm}.bc		-c ${infile} -o ${outfile}-sm_${sm}.bc
DEPENDS ${infile}		DEPENDS ${infile}
Show All 35 Lines

openmp/libomptarget/deviceRTLs/nvptx/src/nvptx_interface.h

	//===--- nvptx_interface.h - OpenMP interface definitions -------- CUDA -*-===//			//===--- nvptx_interface.h - OpenMP interface definitions -------- CUDA -*-===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef _NVPTX_INTERFACE_H_			#ifndef _NVPTX_INTERFACE_H_
	#define _NVPTX_INTERFACE_H_			#define _NVPTX_INTERFACE_H_

	#include <stdint.h>			#include <stdint.h>

				#ifndef _OPENMP
	#define EXTERN extern "C" __device__			#define EXTERN extern "C" __device__
				#else
				#define EXTERN extern "C"
				#endif

	typedef uint32_t __kmpc_impl_lanemask_t;			typedef uint32_t __kmpc_impl_lanemask_t;
	typedef uint32_t omp_lock_t; /* arbitrary type of the right length */			typedef uint32_t omp_lock_t; /* arbitrary type of the right length */

	#endif			#endif