Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -1,2 +1,3 @@ cmake_minimum_required(VERSION 2.8 FATAL_ERROR) add_subdirectory(runtime) +add_subdirectory(libomptarget) \ No newline at end of file Index: libomptarget/Build_With_CMake.txt =================================================================== --- /dev/null +++ libomptarget/Build_With_CMake.txt @@ -0,0 +1,127 @@ +# +#//===----------------------------------------------------------------------===// +#// +#// The LLVM Compiler Infrastructure +#// +#// This file is dual licensed under the MIT and the University of Illinois Open +#// Source Licenses. See LICENSE.txt for details. +#// +#//===----------------------------------------------------------------------===// +# + +===================================================================== +How to Build the LLVM* OpenMP* Offloading Runtime Library using CMake +===================================================================== + +==== Version of CMake required: v2.8.0 or above ==== + +============================================ +How to call cmake initially, then repeatedly +============================================ +- When calling cmake for the first time, all needed compiler options + must be specified on the command line. After this initial call to + cmake, the compiler definitions must not be included for further calls + to cmake. Other options can be specified on the command line multiple + times including all definitions in the Build options section below. +- Example of configuring, building, reconfiguring, rebuilding: + $ mkdir build + $ cd build + $ cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ .. # Initial configuration + $ make + ... + $ make clean + $ cmake -DCMAKE_BUILD_TYPE=Debug .. # Second configuration + $ make + ... + $ rm -rf * + $ cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ .. # Third configuration + $ make +- Notice in the example how the compiler definitions are only specified + for an empty build directory, but other Build options are used at any time. +- The file CMakeCache.txt which is created after the first call to cmake + is a configuration file which holds all the values for the Build options. + These configuration values can be changed using a text editor to modify + CMakeCache.txt as opposed to using definitions on the command line. +- To have cmake create a particular type of build generator file simply + inlude the -G option: + $ cmake -G "Unix Makefiles" ... + You can see a list of generators cmake supports by executing cmake with + no arguments and a list will be printed. + +===================== +Instructions to Build +===================== + $ cd libomptarget_top_level/ [ directory with plugins/ , deviceRTLs/ , etc. ] + $ mkdir build + $ cd build + + [ Unix* Libraries ] + $ cmake -DCMAKE_C_COMPILER= -DCMAKE_CXX_COMPILER= .. + + $ make + $ make install + +=========== +Tests +=========== +After the library has been built, there are optional tests that can be +performed. Some will be skipped based upon the platform. +To run the tests, +$ make check-libomptarget + +============= +CMake options +============= +-DCMAKE_C_COMPILER= +Specify the C compiler + +-DCMAKE_CXX_COMPILER= +Specify the C++ compiler + +==== First values listed are the default value ==== +-DCMAKE_BUILD_TYPE=Release|Debug|RelWithDebInfo +Build type can be Release, Debug, or RelWithDebInfo. + +-DLIBOMPTARGET_ENABLE_WERROR=true|false +Should consider warnings as errors. + +-DLIBOMPTARGET_LLVM_LIT_EXECUTABLE="" +Full path to the llvm-lit tool. Required for testing in out-of-tree builds. + +-DLIBOMPTARGET_FILECHECK_EXECUTABLE="" +Full path to the FileCheck tool. Required for testing in out-of-tree builds. + +-DLIBOMPTARGET_OPENMP_HEADER_FOLDER="" +Path of the folder that contains omp.h. This is required for testing +out-of-tree builds. + +-DLIBOMPTARGET_OPENMP_HOST_RTL_FOLDER="" +Path of the folder that contains libomp.so. This is required for testing +out-of-tree builds. + +==== NVPTX device RTL specific ==== +-DLIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER="" +Host compiler to use with NVCC. This compiler is not going to be used to produce +any binary. Instead, this is used to overcome the input compiler checks done by +NVCC. E.g. if using a default host compiler that is not compatible with NVCC, +this option can be use to pass to NVCC a valid compiler to avoid the error. + +-DLIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY="35" +Comma-separated list of CUDA compute capabilities that should be supported by +the NVPTX device RTL. E.g. for compute capabilities 3.0 and 3.5, the option +"30,35" should be used. + +======================= +Example usages of CMake +======================= +---- Typical usage ---- +cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ .. +cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ .. + +---- Request an NVPTX runtime library that supports compute capability 5.0 ---- +cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DLIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY="50" + +========= +Footnotes +========= +[*] Other names and brands may be claimed as the property of others. Index: libomptarget/CMakeLists.txt =================================================================== --- /dev/null +++ libomptarget/CMakeLists.txt @@ -0,0 +1,115 @@ +##===----------------------------------------------------------------------===## +# +# The LLVM Compiler Infrastructure +# +# This file is dual licensed under the MIT and the University of Illinois Open +# Source Licenses. See LICENSE.txt for details. +# +##===----------------------------------------------------------------------===## +# +# Build offloading library libomptarget.so. +# +##===----------------------------------------------------------------------===## + +# CMAKE libomptarget +cmake_minimum_required(VERSION 2.8 FATAL_ERROR) + +# Add cmake directory to search for custom cmake functions. +set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules ${CMAKE_MODULE_PATH}) + +# Standalone build or part of LLVM? +set(LIBOMPTARGET_STANDALONE_BUILD FALSE) +if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}" OR + "${CMAKE_SOURCE_DIR}/libomptarget" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") + project(libomptarget C CXX) + set(LIBOMPTARGET_STANDALONE_BUILD TRUE) +endif() + + +if(${LIBOMPTARGET_STANDALONE_BUILD}) + set(LIBOMPTARGET_ENABLE_WERROR FALSE CACHE BOOL + "Enable -Werror flags to turn warnings into errors for supporting compilers.") + # CMAKE_BUILD_TYPE was not defined, set default to Release + if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) + endif() +else() + set(LIBOMP_ENABLE_WERROR ${LLVM_ENABLE_WERROR}) +endif() + +# Compiler flag checks. +include(config-ix) + +# Message utilities. +include(LibomptargetUtils) + +# Get dependencies for the different components of the project. +include(LibomptargetGetDependencies) + +# This is a list of all the targets that are supported/tested right now. +set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} powerpc64le-ibm-linux-gnu") +set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} powerpc64-ibm-linux-gnu") +set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-pc-linux-gnu") +set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda") + +# Once the plugins for the different targets are validated, they will be added to +# the list of supported targets in the current system. +set (LIBOMPTARGET_SYSTEM_TARGETS "") + +# Set base directories - required for lit to locate the tests. +set(LIBOMPTARGET_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(LIBOMPTARGET_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + +# We need C++11 support. +if(LIBOMPTARGET_HAVE_STD_CPP11_FLAG) + + libomptarget_say("Building offloading runtime library libomptarget.") + + # Add libelf include directories. + include_directories(${LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS}) + + # Enable support for C++11. + add_definitions(-std=c++11) + + if(LIBOMPTARGET_ENABLE_WERROR AND LIBOMPTARGET_HAVE_WERROR_FLAG) + add_definitions(-Werror) + endif() + + # If building this library in debug mode, we define a macro to enable + # dumping progress messages at runtime. + string( TOLOWER "${CMAKE_BUILD_TYPE}" LIBOMPTARGET_CMAKE_BUILD_TYPE) + if(LIBOMPTARGET_CMAKE_BUILD_TYPE MATCHES debug) + add_definitions(-DOMPTARGET_DEBUG) + add_definitions(-g) + add_definitions(-O0) + endif() + + set(src_files + src/omptarget.cpp + ) + + include_directories(src/) + + # Build libomptarget library with libelf and libdl dependencies. + add_library(omptarget SHARED ${src_files}) + target_link_libraries(omptarget + ${LIBOMPTARGET_DEP_LIBELF_LIBRARIES} + dl + "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports") + + # Install libomptarget under the lib destination folder. + install(TARGETS omptarget LIBRARY DESTINATION "lib") + + # Retrieve the path to the resulting library so that it can be used for + # testing. + get_target_property(LIBOMPTARGET_LIBRARY_DIR omptarget LIBRARY_OUTPUT_DIRECTORY) + if(NOT LIBOMPTARGET_LIBRARY_DIR) + set(LIBOMPTARGET_LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + endif() + + # Add tests. + add_subdirectory(test) + +else(LIBOMPTARGET_HAVE_STD_CPP11_FLAG) + libomptarget_say("Not building offloading runtime library libomptarget: host compiler must have c++11 support.") +endif(LIBOMPTARGET_HAVE_STD_CPP11_FLAG) Index: libomptarget/README.txt =================================================================== --- /dev/null +++ libomptarget/README.txt @@ -0,0 +1,72 @@ + + README for the LLVM* OpenMP* Offloading Runtime Library (libomptarget) + ====================================================================== + +How to Build the LLVM* OpenMP* Offloading Runtime Library (libomptarget) +======================================================================== +In-tree build: + +$ cd where-you-want-to-live +Check out openmp (libomptarget lives under ./libomptarget) into llvm/projects +$ cd where-you-want-to-build +$ mkdir build && cd build +$ cmake path/to/llvm -DCMAKE_C_COMPILER= -DCMAKE_CXX_COMPILER= +$ make omptarget + +Out-of-tree build: + +$ cd where-you-want-to-live +Check out openmp (libomptarget lives under ./libomptarget) +$ cd where-you-want-to-live/openmp/libomptarget +$ mkdir build && cd build +$ cmake path/to/openmp -DCMAKE_C_COMPILER= -DCMAKE_CXX_COMPILER= +$ make + +For details about building, please look at Build_With_CMake.txt + +Architectures Supported +======================= +The current library has been only tested in Linux operating system and the +following host architectures: +* Intel(R) 64 architecture +* IBM(R) Power architecture (big endian) +* IBM(R) Power architecture (little endian) + +The currently supported offloading device architectures are: +* Intel(R) 64 architecture (generic 64-bit plugin - mostly for testing purposes) +* IBM(R) Power architecture (big endian) (generic 64-bit plugin - mostly for testing purposes) +* IBM(R) Power architecture (little endian) (generic 64-bit plugin - mostly for testing purposes) +* CUDA(R) enabled 64-bit NVIDIA(R) GPU architectures + +Supported RTL Build Configurations +================================== +Supported Architectures: Intel(R) 64, IBM(R) Power 7 and Power 8 + + --------------------------- + | gcc | clang | +--------------|------------|------------| +| Linux* OS | Yes(1) | Yes(2) | +----------------------------------------- + +(1) gcc version 4.8.2 or later is supported. +(2) clang version 3.7 or later is supported. + + +Front-end Compilers that work with this RTL +=========================================== + +The following compilers are known to do compatible code generation for +this RTL: + - clang (from the OpenMP development branch at http://clang-omp.github.io/ ) + - clang (development branch at http://clang.llvm.org - several features still + under development) + +----------------------------------------------------------------------- + +Notices +======= +This library and related compiler support is still under development, so the +employed interface is likely to change in the future. + +*Other names and brands may be claimed as the property of others. + Index: libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake =================================================================== --- /dev/null +++ libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake @@ -0,0 +1,124 @@ +# +#//===----------------------------------------------------------------------===// +#// +#// The LLVM Compiler Infrastructure +#// +#// This file is dual licensed under the MIT and the University of Illinois Open +#// Source Licenses. See LICENSE.txt for details. +#// +#//===----------------------------------------------------------------------===// +# + +# Try to detect in the system several dependencies required by the different +# components of libomptarget. These are the dependencies we have: +# +# libelf : required by some targets to handle the ELF files at runtime. +# libffi : required to launch target kernels given function and argument +# pointers. +# CUDA : required to control offloading to NVIDIA GPUs. + +include (FindPackageHandleStandardArgs) + +################################################################################ +# Looking for libelf... +################################################################################ + +find_path ( + LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIR + NAMES + libelf.h + PATHS + /usr/include + /usr/local/include + /opt/local/include + /sw/include + ENV CPATH + PATH_SUFFIXES + libelf) + +find_library ( + LIBOMPTARGET_DEP_LIBELF_LIBRARIES + NAMES + elf + PATHS + /usr/lib + /usr/local/lib + /opt/local/lib + /sw/lib + ENV LIBRARY_PATH + ENV LD_LIBRARY_PATH) + +set(LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS ${LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIR}) +find_package_handle_standard_args( + LIBOMPTARGET_DEP_LIBELF + DEFAULT_MSG + LIBOMPTARGET_DEP_LIBELF_LIBRARIES + LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS) + +mark_as_advanced( + LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS + LIBOMPTARGET_DEP_LIBELF_LIBRARIES) + +################################################################################ +# Looking for libffi... +################################################################################ +find_package(PkgConfig) + +pkg_check_modules(LIBOMPTARGET_SEARCH_LIBFFI QUIET libffi) + +find_path ( + LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR + NAMES + ffi.h + HINTS + ${LIBOMPTARGET_SEARCH_LIBFFI_INCLUDEDIR} + ${LIBOMPTARGET_SEARCH_LIBFFI_INCLUDE_DIRS} + PATHS + /usr/include + /usr/local/include + /opt/local/include + /sw/include + ENV CPATH) + +# Don't bother look for the library if the header files were not found. +if (LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR) + find_library ( + LIBOMPTARGET_DEP_LIBFFI_LIBRARIES + NAMES + ffi + HINTS + ${LIBOMPTARGET_SEARCH_LIBFFI_LIBDIR} + ${LIBOMPTARGET_SEARCH_LIBFFI_LIBRARY_DIRS} + PATHS + /usr/lib + /usr/local/lib + /opt/local/lib + /sw/lib + ENV LIBRARY_PATH + ENV LD_LIBRARY_PATH) +endif() + +set(LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIRS ${LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR}) +find_package_handle_standard_args( + LIBOMPTARGET_DEP_LIBFFI + DEFAULT_MSG + LIBOMPTARGET_DEP_LIBFFI_LIBRARIES + LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIRS) + +mark_as_advanced( + LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIRS + LIBOMPTARGET_DEP_LIBFFI_LIBRARIES) + +################################################################################ +# Looking for CUDA... +################################################################################ +find_package(CUDA QUIET) + +set(LIBOMPTARGET_DEP_CUDA_FOUND ${CUDA_FOUND}) +set(LIBOMPTARGET_DEP_CUDA_LIBRARIES ${CUDA_LIBRARIES}) +set(LIBOMPTARGET_DEP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) + +mark_as_advanced( + LIBOMPTARGET_DEP_CUDA_FOUND + LIBOMPTARGET_DEP_CUDA_INCLUDE_DIRS + LIBOMPTARGET_DEP_CUDA_LIBRARIES) Index: libomptarget/cmake/Modules/LibomptargetUtils.cmake =================================================================== --- /dev/null +++ libomptarget/cmake/Modules/LibomptargetUtils.cmake @@ -0,0 +1,28 @@ +# +#//===----------------------------------------------------------------------===// +#// +#// The LLVM Compiler Infrastructure +#// +#// This file is dual licensed under the MIT and the University of Illinois Open +#// Source Licenses. See LICENSE.txt for details. +#// +#//===----------------------------------------------------------------------===// +# + +# void libomptarget_say(string message_to_user); +# - prints out message_to_user +macro(libomptarget_say message_to_user) + message(STATUS "LIBOMPTARGET: ${message_to_user}") +endmacro() + +# void libomptarget_warning_say(string message_to_user); +# - prints out message_to_user with a warning +macro(libomptarget_warning_say message_to_user) + message(WARNING "LIBOMPTARGET: ${message_to_user}") +endmacro() + +# void libomptarget_error_say(string message_to_user); +# - prints out message_to_user with an error and exits cmake +macro(libomptarget_error_say message_to_user) + message(FATAL_ERROR "LIBOMPTARGET: ${message_to_user}") +endmacro() Index: libomptarget/cmake/Modules/config-ix.cmake =================================================================== --- /dev/null +++ libomptarget/cmake/Modules/config-ix.cmake @@ -0,0 +1,17 @@ +# +#//===----------------------------------------------------------------------===// +#// +#// The LLVM Compiler Infrastructure +#// +#// This file is dual licensed under the MIT and the University of Illinois Open +#// Source Licenses. See LICENSE.txt for details. +#// +#//===----------------------------------------------------------------------===// +# + +include(CheckCCompilerFlag) +include(CheckCXXCompilerFlag) + +# Checking C, CXX +check_cxx_compiler_flag(-std=c++11 LIBOMPTARGET_HAVE_STD_CPP11_FLAG) +check_c_compiler_flag(-Werror LIBOMPTARGET_HAVE_WERROR_FLAG) Index: libomptarget/exports =================================================================== --- /dev/null +++ libomptarget/exports @@ -0,0 +1,22 @@ +VERS1.0 { + global: + __tgt_register_lib; + __tgt_unregister_lib; + __tgt_target_data_begin; + __tgt_target_data_end; + __tgt_target_data_update; + __tgt_target; + __tgt_target_teams; + __tgt_target_data_begin_nowait; + __tgt_target_data_end_nowait; + __tgt_target_data_update_nowait; + __tgt_target_nowait; + __tgt_target_teams_nowait; + omp_set_default_device; + omp_get_default_device; + omp_get_num_devices; + omp_is_initial_device; + local: + *; +}; + Index: libomptarget/src/omptarget.h =================================================================== --- /dev/null +++ libomptarget/src/omptarget.h @@ -0,0 +1,171 @@ +//===-------- omptarget.h - Target independent OpenMP target RTL -- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// +// +// Interface to be used by Clang during the codegen of a +// target region. +// +//===----------------------------------------------------------------------===// + +#include + +#ifndef _OMPTARGET_H_ +#define _OMPTARGET_H_ + +#define OFFLOAD_SUCCESS (0) +#define OFFLOAD_FAIL (~0) + +#define OFFLOAD_DEVICE_DEFAULT -1 +#define OFFLOAD_DEVICE_CONSTRUCTOR -2 +#define OFFLOAD_DEVICE_DESTRUCTOR -3 + +/// Data attributes for each data reference used in an OpenMP target region. +enum tgt_map_type { + tgt_map_alloc = 0x00, // allocate memory in the device for this reference + tgt_map_to = + 0x01, // copy the data to the device but do not update the host memory + tgt_map_from = + 0x02, // copy the data to the host but do not update the device memory + tgt_map_always = 0x04, + tgt_map_release = 0x08, + tgt_map_delete = 0x18, + tgt_map_pointer = 0x20, + tgt_map_extra = 0x40 +}; + +/// This struct is a record of an entry point or global. For a function +/// entry point the size is expected to be zero +struct __tgt_offload_entry { + void *addr; // Pointer to the offload entry info (function or global) + char *name; // Name of the function or global + int64_t size; // Size of the entry info (0 if it a function) +}; + +/// This struct is a record of the device image information +struct __tgt_device_image { + void *ImageStart; // Pointer to the target code start + void *ImageEnd; // Pointer to the target code end + __tgt_offload_entry *EntriesBegin; // Begin of the table with all the entries + __tgt_offload_entry + *EntriesEnd; // End of the table with all the entries (non inclusive) +}; + +/// This struct is a record of all the host code that may be offloaded to a +/// target. +struct __tgt_bin_desc { + int32_t NumDevices; // Number of devices supported + __tgt_device_image *DeviceImages; // Arrays of device images (one per device) + __tgt_offload_entry *EntriesBegin; // Begin of the table with all the entries + __tgt_offload_entry + *EntriesEnd; // End of the table with all the entries (non inclusive) +}; + +/// This struct contains the offload entries identified by the target runtime +struct __tgt_target_table { + __tgt_offload_entry *EntriesBegin; // Begin of the table with all the entries + __tgt_offload_entry + *EntriesEnd; // End of the table with all the entries (non inclusive) +}; + +#ifdef __cplusplus +extern "C" { +#endif + +void omp_set_default_device(int device_num); +int omp_get_default_device(void); +int omp_get_num_devices(void); +int omp_is_initial_device(void); + +/// adds a target shared library to the target execution image +void __tgt_register_lib(__tgt_bin_desc *desc); + +/// removes a target shared library to the target execution image +void __tgt_unregister_lib(__tgt_bin_desc *desc); + +// creates host to the target data mapping, store it in the +// libtarget.so internal structure (an entry in a stack of data maps) and +// passes the data to the device; +void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, + int32_t *arg_types); +void __tgt_target_data_begin_nowait(int32_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int32_t *arg_types, + int32_t depNum, void *depList, + int32_t noAliasDepNum, + void *noAliasDepList); + +// passes data from the target, release target memory and destroys the +// host-target mapping (top entry from the stack of data maps) created by +// the last __tgt_target_data_begin +void __tgt_target_data_end(int32_t device_id, int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, int32_t *arg_types); +void __tgt_target_data_end_nowait(int32_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int32_t *arg_types, + int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList); + +/// passes data to/from the target +void __tgt_target_data_update(int32_t device_id, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, + int32_t *arg_types); +void __tgt_target_data_update_nowait(int32_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int32_t *arg_types, + int32_t depNum, void *depList, + int32_t noAliasDepNum, + void *noAliasDepList); + +// performs the same actions as data_begin in case arg_num is non-zero +// and initiates run of offloaded region on target platform; if arg_num +// is non-zero after the region execution is done it also performs the +// same action as data_update and data_end aboveThe following types are +// used; this function return 0 if it was able to transfer the execution +// to a target and an int different from zero otherwise +int __tgt_target(int32_t device_id, void *host_ptr, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, + int32_t *arg_types); +int __tgt_target_nowait(int32_t device_id, void *host_ptr, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, + int32_t *arg_types, int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList); + +int __tgt_target_teams(int32_t device_id, void *host_ptr, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, + int32_t *arg_types, int32_t num_teams, + int32_t thread_limit); +int __tgt_target_teams_nowait(int32_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int32_t *arg_types, + int32_t num_teams, int32_t thread_limit, + int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList); + +#ifdef __cplusplus +} +#endif + +#ifdef OMPTARGET_DEBUG +#define DEBUGP(prefix, ...) \ + { \ + fprintf(stderr, "%s --> ", prefix); \ + fprintf(stderr, __VA_ARGS__); \ + } +#else +#define DEBUGP(prefix, ...) \ + {} +#endif + +#ifdef __cplusplus +#define EXTERN extern "C" +#else +#define EXTERN extern +#endif + +#endif // _OMPTARGET_H_ Index: libomptarget/src/omptarget.cpp =================================================================== --- /dev/null +++ libomptarget/src/omptarget.cpp @@ -0,0 +1,1018 @@ +//===------ omptarget.cpp - Target independent OpenMP target RTL -- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the interface to be used by Clang during the codegen of a +// target region. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Header file global to this project +#include "omptarget.h" + +#define DP(...) DEBUGP("Libomptarget", __VA_ARGS__) + +// List of all plugins that can support offloading. +static const char *RTLNames[] = { + /* PowerPC target */ "libomptarget.rtl.ppc64.so", + /* x86_64 target */ "libomptarget.rtl.x86_64.so", + /* CUDA target */ "libomptarget.rtl.cuda.so"}; + +struct RTLInfoTy; + +/// Map between host data and target data. +struct HostDataToTargetTy { + long HstPtrBase; // host info. + long HstPtrBegin; + long HstPtrEnd; // non-inclusive. + + long TgtPtrBegin; // target info. + long TgtPtrEnd; // non-inclusive (FIXME: maybe not needed?) + + long RefCount; + + HostDataToTargetTy() + : HstPtrBase(0), HstPtrBegin(0), HstPtrEnd(0), TgtPtrBegin(0), + TgtPtrEnd(0), RefCount(0) {} + HostDataToTargetTy(long BP, long B, long E, long TB, long TE) + : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), TgtPtrBegin(TB), + TgtPtrEnd(TE), RefCount(1) {} +}; + +typedef std::list HostDataToTargetListTy; + +struct DeviceTy { + int32_t DeviceID; + RTLInfoTy *RTL; + int32_t RTLDeviceID; + + bool IsInit; + HostDataToTargetListTy HostDataToTargetMap; + std::list PendingConstrDestrHostPtrList; + + DeviceTy(RTLInfoTy *RTL) + : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), + HostDataToTargetMap(), PendingConstrDestrHostPtrList() {} + + void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, long Size, + long &IsNew, long UpdateRefCount = true); + void *getTgtPtrBegin(void *HstPtrBegin, long Size); + void *getTgtPtrBegin(void *HstPtrBegin, long Size, long &IsLast, + long UpdateRefCount = true); + void deallocTgtPtr(void *TgtPtrBegin, long Size, long ForceDelete); + + // calls to RTL + int32_t init(); + __tgt_target_table *load_binary(void *Img); + + int32_t data_submit(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size); + int32_t data_retrieve(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size); + + int32_t run_region(void *TgtEntryPtr, void **TgtVarsPtr, int32_t TgtVarsSize); + int32_t run_team_region(void *TgtEntryPtr, void **TgtVarsPtr, + int32_t TgtVarsSize, int32_t NumTeams, + int32_t ThreadLimit); +}; + +struct RTLInfoTy { + typedef int32_t(is_valid_binary_ty)(void *); + typedef int32_t(number_of_devices_ty)(); + typedef int32_t(init_device_ty)(int32_t); + typedef __tgt_target_table *(load_binary_ty)(int32_t, void *); + typedef void *(data_alloc_ty)(int32_t, int64_t); + typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t); + typedef int32_t(data_retrieve_ty)(int32_t, void *, void *, int64_t); + typedef int32_t(data_delete_ty)(int32_t, void *); + typedef int32_t(run_region_ty)(int32_t, void *, void **, int32_t); + typedef int32_t(run_team_region_ty)(int32_t, void *, void **, int32_t, + int32_t, int32_t); + + int32_t Idx; // RTL index, index is the number of devices + // of other RTLs that were registered before. + int32_t NumberOfDevices; // Number of devices this RTL deal with. + std::vector Devices; // one per device (NumberOfDevices in total). + + void *LibraryHandler; + // Functions implemented in the RTL. + is_valid_binary_ty *is_valid_binary; + number_of_devices_ty *number_of_devices; + init_device_ty *init_device; + load_binary_ty *load_binary; + data_alloc_ty *data_alloc; + data_submit_ty *data_submit; + data_retrieve_ty *data_retrieve; + data_delete_ty *data_delete; + run_region_ty *run_region; + run_team_region_ty *run_team_region; + + // Are there images associated with this RTL. + bool isUsed; +}; + +/// Map between Device ID (i.e. openmp device id) and its DeviceTy. +typedef std::vector DevicesTy; +static DevicesTy Devices; + +/// RTLs identified in the system. +class RTLsTy { + // Set to true if the library attempted to load the RTLs (plugins) before. + bool isInitialized; + +public: + // List of the detected runtime libraries. + std::list AllRTLs; + + // Array of pointers to the detected runtime libraries that have compatible + // binaries. + std::vector UsedRTLs; + + explicit RTLsTy() : isInitialized(false) {} + + // Load all the runtime libraries (plugins) if not done before. + void LoadRTLs(); +}; + +void RTLsTy::LoadRTLs() { + // Did we load the RTLs before? If so, just return. + if (isInitialized) + return; + + // Attempt to open all the plugins and, if they exist, check if the interface + // is correct and if they are supporting any devices. + for (auto *Name : RTLNames) { + void *dynlib_handle = dlopen(Name, RTLD_NOW); + + if (!dynlib_handle) { + // Library does not exist or cannot be found. + DP("Unable to load library '%s': %s!\n", Name, dlerror()); + continue; + } + + DP("Successfully loaded library '%s'!\n", Name); + + // Retrieve the RTL information from the runtime library. + RTLInfoTy R; + + R.LibraryHandler = dynlib_handle; + R.isUsed = false; + if (!(R.is_valid_binary = (RTLInfoTy::is_valid_binary_ty *)dlsym( + dynlib_handle, "__tgt_rtl_is_valid_binary"))) + continue; + if (!(R.number_of_devices = (RTLInfoTy::number_of_devices_ty *)dlsym( + dynlib_handle, "__tgt_rtl_number_of_devices"))) + continue; + if (!(R.init_device = (RTLInfoTy::init_device_ty *)dlsym( + dynlib_handle, "__tgt_rtl_init_device"))) + continue; + if (!(R.load_binary = (RTLInfoTy::load_binary_ty *)dlsym( + dynlib_handle, "__tgt_rtl_load_binary"))) + continue; + if (!(R.data_alloc = (RTLInfoTy::data_alloc_ty *)dlsym( + dynlib_handle, "__tgt_rtl_data_alloc"))) + continue; + if (!(R.data_submit = (RTLInfoTy::data_submit_ty *)dlsym( + dynlib_handle, "__tgt_rtl_data_submit"))) + continue; + if (!(R.data_retrieve = (RTLInfoTy::data_retrieve_ty *)dlsym( + dynlib_handle, "__tgt_rtl_data_retrieve"))) + continue; + if (!(R.data_delete = (RTLInfoTy::data_delete_ty *)dlsym( + dynlib_handle, "__tgt_rtl_data_delete"))) + continue; + if (!(R.run_region = (RTLInfoTy::run_region_ty *)dlsym( + dynlib_handle, "__tgt_rtl_run_target_region"))) + continue; + if (!(R.run_team_region = (RTLInfoTy::run_team_region_ty *)dlsym( + dynlib_handle, "__tgt_rtl_run_target_team_region"))) + continue; + + // No devices are supported by this RTL? + if (!(R.NumberOfDevices = R.number_of_devices())) { + DP("No devices supported in this RTL\n"); + continue; + } + + DP("Registering RTL %016lx supporting %d devices!\n", (long)dynlib_handle, + R.NumberOfDevices); + + // The RTL is valid! Will save the information in the RTLs list. + AllRTLs.push_back(R); + } + return; +} + +static RTLsTy RTLs; + +/// Map between the host entry begin and the translation table. Each +/// registered library gets one TranslationTable. Use the map from +/// __tgt_offload_entry so that we may quickly determine if we are +/// trying to (re)register an existing lib, or if we really have a new one. +struct TranslationTable { + __tgt_target_table HostTable; + + // Image assigned to a given device. + std::vector<__tgt_device_image *> TargetsImages; // One image per device ID. + + // Table of entry points or NULL if it was not already computed. + std::vector<__tgt_target_table *> TargetsTable; // One table per device ID. +}; +typedef std::map<__tgt_offload_entry *, TranslationTable> + HostEntriesBeginToTransTableTy; +static HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable; + +/// Map between the host ptr and a table index +struct TableMap { + TranslationTable *Table; // table associated with the host ptr. + uint32_t Index; // index in which the host ptr translated entry is found. + TableMap() : Table(0), Index(0) {} + TableMap(TranslationTable *table, uint32_t index) + : Table(table), Index(index) {} +}; +typedef std::map HostPtrToTableMapTy; +static HostPtrToTableMapTy HostPtrToTableMap; + +//////////////////////////////////////////////////////////////////////////////// +// getter and setter +// +// FIXME: Non-compliant. This need to be integrated in KMPC; can keep it +// here for the moment + +static int DefaultDevice = 0; + +void omp_set_default_device(int device_num) { DefaultDevice = device_num; } + +int omp_get_default_device(void) { return DefaultDevice; } + +int omp_get_num_devices(void) { return Devices.size(); } + +int omp_is_initial_device(void) { return true; } + +//////////////////////////////////////////////////////////////////////////////// +// functionality for device + +// return the target pointer begin (where the data will be moved) +void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, long Size) { + long IsLast; + return getTgtPtrBegin(HstPtrBegin, Size, IsLast, false); +} +void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, long Size, long &IsLast, + long UpdateRefCount) { + long hp = (long)HstPtrBegin; + IsLast = false; + + for (auto &HT : HostDataToTargetMap) { + if (hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd) { + if ((hp + Size) > HT.HstPtrEnd) { + DP("WARNING: Array contain pointer but does not contain the complete " + "section\n"); + } + + IsLast = !(HT.RefCount > 1); + + if (HT.RefCount > 1 && UpdateRefCount) + --HT.RefCount; + + long tp = HT.TgtPtrBegin + (hp - HT.HstPtrBegin); + return (void *)tp; + } + } + + return NULL; +} + +// return the target pointer begin (where the data will be moved). +void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, long Size, + long &IsNew, long UpdateRefCount) { + long hp = (long)HstPtrBegin; + IsNew = false; + + // Check if the pointer is contained. + for (auto &HT : HostDataToTargetMap) { + // Is it contained? + if (hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd) { + if ((hp + Size) > HT.HstPtrEnd) { + DP("WARNING: Array contain pointer but does not contain the complete " + "section\n"); + } + if (UpdateRefCount) + ++HT.RefCount; + long tp = HT.TgtPtrBegin + (hp - HT.HstPtrBegin); + return (void *)tp; + } + } + + // It is not contained we should create a new entry for it. + IsNew = true; + long tp = (long)RTL->data_alloc(RTLDeviceID, Size); + HostDataToTargetMap.push_front( + HostDataToTargetTy((long)HstPtrBase, hp, hp + Size, tp, tp + Size)); + return (void *)tp; +} + +void DeviceTy::deallocTgtPtr(void *HstPtrBegin, long Size, long ForceDelete) { + long hp = (long)HstPtrBegin; + + // Check if the pointer is contained in any sub-nodes. + for (auto ii = HostDataToTargetMap.begin(), ie = HostDataToTargetMap.end(); + ii != ie; ++ii) { + auto &HT = *ii; + // Is it contained? + if (hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd) { + if ((hp + Size) > HT.HstPtrEnd) { + DP("WARNING: Array contain pointer but does not contain the complete " + "section\n"); + } + if (ForceDelete) + HT.RefCount = 1; + if (--HT.RefCount <= 0) { + assert(HT.RefCount == 0 && "did not expect a negative ref count"); + DP("Deleting tgt data 0x%016llx of size %lld\n", + (long long)HT.TgtPtrBegin, (long long)Size); + RTL->data_delete(RTLDeviceID, (void *)HT.TgtPtrBegin); + HostDataToTargetMap.erase(ii); + } + return; + } + } + DP("Section to delete (hst addr 0x%llx) does not exist in the allocated " + "memory\n", + (unsigned long long)hp); +} + +// init device. +int32_t DeviceTy::init() { + int32_t rc = RTL->init_device(RTLDeviceID); + if (rc == OFFLOAD_SUCCESS) { + IsInit = true; + } + return rc; +} + +// load binary to device. +__tgt_target_table *DeviceTy::load_binary(void *Img) { + return RTL->load_binary(RTLDeviceID, Img); +} + +// submit data to device. +int32_t DeviceTy::data_submit(void *TgtPtrBegin, void *HstPtrBegin, + int64_t Size) { + return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); +} + +// retrieve data from device. +int32_t DeviceTy::data_retrieve(void *HstPtrBegin, void *TgtPtrBegin, + int64_t Size) { + return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); +} + +// run region on device +int32_t DeviceTy::run_region(void *TgtEntryPtr, void **TgtVarsPtr, + int32_t TgtVarsSize) { + return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtVarsSize); +} + +// run team region on device. +int32_t DeviceTy::run_team_region(void *TgtEntryPtr, void **TgtVarsPtr, + int32_t TgtVarsSize, int32_t NumTeams, + int32_t ThreadLimit) { + return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtVarsSize, + NumTeams, ThreadLimit); +} + +//////////////////////////////////////////////////////////////////////////////// +// functionality for registering libs + +static void RegisterImageIntoTranslationTable(TranslationTable &TT, + RTLInfoTy &RTL, + __tgt_device_image *image) { + + // same size, as when we increase one, we also increase the other. + assert(TT.TargetsTable.size() == TT.TargetsImages.size() && + "We should have as many images as we have tables!"); + + // Resize the Targets Table and Images to accommodate the new targets if + // required + unsigned TargetsTableMinimumSize = RTL.Idx + RTL.NumberOfDevices; + + if (TT.TargetsTable.size() < TargetsTableMinimumSize) { + TT.TargetsImages.resize(TargetsTableMinimumSize, 0); + TT.TargetsTable.resize(TargetsTableMinimumSize, 0); + } + + // Register the image in all devices for this target type. + for (int32_t i = 0; i < RTL.NumberOfDevices; ++i) { + // If we are changing the image we are also invalidating the target table. + if (TT.TargetsImages[RTL.Idx + i] != image) { + TT.TargetsImages[RTL.Idx + i] = image; + TT.TargetsTable[RTL.Idx + i] = 0; // lazy initialization of target table. + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// adds a target shared library to the target execution image +EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { + + // Attempt to load all the plugins available in the system. + RTLs.LoadRTLs(); + + // Register the images with the RTLs that understand them, if any. + for (int32_t i = 0; i < desc->NumDevices; ++i) { + // Obtain the image. + __tgt_device_image *img = &desc->DeviceImages[i]; + + bool FoundRTL = false; + + // Scan the RTLs that have associated images until we find one that supports + // the current image. We attempt to use the RTLs that are already being + // used first. + for (auto *R : RTLs.UsedRTLs) { + + assert(R->isUsed && "Expecting used RTLs."); + + if (!R->is_valid_binary(img)) { + DP("Image %016lx is NOT compatible with RTL %016lx!\n", + (long)img->ImageStart, (long)R->LibraryHandler); + continue; + } + + DP("Image %016lx is compatible with RTL %016lx!\n", (long)img->ImageStart, + (long)R->LibraryHandler); + + // Initialize translation table for this. + TranslationTable &TransTable = + HostEntriesBeginToTransTable[desc->EntriesBegin]; + TransTable.HostTable.EntriesBegin = desc->EntriesBegin; + TransTable.HostTable.EntriesEnd = desc->EntriesEnd; + + DP("Registering image %016lx with RTL %016lx!\n", (long)img->ImageStart, + (long)R->LibraryHandler); + RegisterImageIntoTranslationTable(TransTable, *R, img); + FoundRTL = true; + break; + } + + if (FoundRTL) + break; + + DP("No RTLs in use support the image %016lx!\n", (long)img->ImageStart); + + // Find a compatible RTL that is not being used. + for (auto &R : RTLs.AllRTLs) { + if (R.isUsed) + continue; + + if (!R.is_valid_binary(img)) { + DP("Image %016lx is NOT compatible with RTL %016lx!\n", + (long)img->ImageStart, (long)R.LibraryHandler); + continue; + } + + DP("Image %016lx is compatible with RTL %016lx!\n", (long)img->ImageStart, + (long)R.LibraryHandler); + + // Initialize the device information for the RTL we are about to use. + DeviceTy device(&R); + + size_t start = Devices.size(); + Devices.resize(start + R.NumberOfDevices, device); + for (int32_t device_id = 0; device_id < R.NumberOfDevices; device_id++) { + // global device ID + Devices[start + device_id].DeviceID = start + device_id; + // RTL local device ID + Devices[start + device_id].RTLDeviceID = device_id; + + // Save pointer to device in RTL in case we want to unregister the RTL + R.Devices.push_back(&Devices[start + device_id]); + } + + // Initialize the index of this RTL and save it in the used RTLs. + R.Idx = (RTLs.UsedRTLs.empty()) + ? 0 + : RTLs.UsedRTLs.back()->Idx + + RTLs.UsedRTLs.back()->NumberOfDevices; + R.isUsed = true; + RTLs.UsedRTLs.push_back(&R); + + DP("RTL %016lx has index %d!\n", (long)R.LibraryHandler, R.Idx); + + // Initialize translation table for this. + TranslationTable &TransTable = + HostEntriesBeginToTransTable[desc->EntriesBegin]; + TransTable.HostTable.EntriesBegin = desc->EntriesBegin; + TransTable.HostTable.EntriesEnd = desc->EntriesEnd; + + DP("Registering image %016lx with RTL %016lx!\n", (long)img->ImageStart, + (long)R.LibraryHandler); + RegisterImageIntoTranslationTable(TransTable, R, img); + FoundRTL = true; + break; + } + + if (FoundRTL) + break; + + DP("No RTL found for image %016lx!\n", (long)img->ImageStart); + } + + DP("Done register entries!\n"); +} + +//////////////////////////////////////////////////////////////////////////////// +/// unloads a target shared library +EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) { + DP("Unloading target library!\n"); + return; +} + +/// Internal function to do the mapping and transfer the data to the device +static void target_data_begin(DeviceTy &Device, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, + int32_t *arg_types) { + // process each input. + for (int32_t i = 0; i < arg_num; ++i) { + void *HstPtrBegin = args[i]; + void *HstPtrBase = args_base[i]; + void *Pointer_TgtPtrBegin; + long IsNew, Pointer_IsNew; + if (arg_types[i] & tgt_map_pointer) { + DP("has a pointer entry: \n"); + // base is address of pointer. + Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr( + HstPtrBase, HstPtrBase, sizeof(void *), Pointer_IsNew); + DP("There are %ld bytes allocated at target address %016lx\n", + (long)sizeof(void *), (long)Pointer_TgtPtrBegin); + assert(Pointer_TgtPtrBegin && + "Data allocation by RTL returned invalid ptr"); + // modify current entry. + HstPtrBase = *(void **)HstPtrBase; + } + + void *TgtPtrBegin = + Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase, arg_sizes[i], IsNew); + DP("There are %ld bytes allocated at target address %016lx - is new %ld\n", + (long)arg_sizes[i], (long)TgtPtrBegin, IsNew); + assert(TgtPtrBegin && "Data allocation by RTL returned invalid ptr"); + + if ((arg_types[i] & tgt_map_to) && + (IsNew || (arg_types[i] & tgt_map_always))) { + DP("Moving %ld bytes (hst:%016lx) -> (tgt:%016lx)\n", (long)arg_sizes[i], + (long)HstPtrBegin, (long)TgtPtrBegin); + Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]); + } + + if (arg_types[i] & tgt_map_pointer) { + DP("Update pointer (%016lx) -> [%016lx]\n", (long)Pointer_TgtPtrBegin, + (long)TgtPtrBegin); + uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; + void *TgrPtrBase_Value = (void *)((uint64_t)TgtPtrBegin - Delta); + Device.data_submit(Pointer_TgtPtrBegin, &TgrPtrBase_Value, + sizeof(void *)); + } + } +} + +EXTERN void __tgt_target_data_begin_nowait(int32_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, + int32_t *arg_types, int32_t depNum, + void *depList, int32_t noAliasDepNum, + void *noAliasDepList) { + __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes, + arg_types); +} + +/// creates host-to-target data mapping, store it in the +/// libomptarget.so internal structure (an entry in a stack of data maps) +/// and passes the data to the device. +EXTERN void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int32_t *arg_types) { + DP("Entering data begin region for device %d with %d mappings\n", device_id, + arg_num); + + // No devices available? + if (device_id == OFFLOAD_DEVICE_DEFAULT) { + device_id = omp_get_default_device(); + DP("Use default device id %d\n", device_id); + } + if (Devices.size() <= (size_t)device_id) { + DP("Device ID %d does not have a matching RTL.\n", device_id); + return; + } + + // Get device info + DeviceTy &Device = Devices[device_id]; + // Init the device if not done before + if (!Device.IsInit) { + if (Device.init() != OFFLOAD_SUCCESS) { + DP("failed to init device %d\n", device_id); + return; + } + } + + target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types); +} + +/// Internal function to undo the mapping and retrieve the data from the device. +static void target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, + int32_t *arg_types) { + // process each input. + for (int32_t i = 0; i < arg_num; ++i) { + void *HstPtrBegin = args[i]; + void *HstPtrBase = args_base[i]; + long IsLast; + long ForceDelete = arg_types[i] & tgt_map_delete; + if (arg_types[i] & tgt_map_pointer) { + // base is pointer begin. + Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast); + if (IsLast || ForceDelete) { + Device.deallocTgtPtr(HstPtrBase, sizeof(void *), ForceDelete); + } + } + void *TgtPtrBegin = + Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast); + + DP("There are %ld bytes allocated at target address %016lx - is last %ld\n", + (long)arg_sizes[i], (long)TgtPtrBegin, IsLast); + + long Always = arg_types[i] & tgt_map_always; + if ((arg_types[i] & tgt_map_from) && (IsLast || ForceDelete || Always)) { + DP("Moving %ld bytes (tgt:%016lx) -> (hst:%016lx)\n", (long)arg_sizes[i], + (long)TgtPtrBegin, (long)HstPtrBegin); + Device.data_retrieve(HstPtrBegin, TgtPtrBegin, arg_sizes[i]); + } + if (IsLast || ForceDelete) { + Device.deallocTgtPtr(HstPtrBegin, arg_sizes[i], ForceDelete); + } + } +} + +/// passes data from the target, release target memory and destroys +/// the host-target mapping (top entry from the stack of data maps) +/// created by the last __tgt_target_data_begin. +EXTERN void __tgt_target_data_end(int32_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int32_t *arg_types) { + DP("Entering data end region with %d mappings\n", arg_num); + + // No devices available? + if (device_id == OFFLOAD_DEVICE_DEFAULT) { + device_id = omp_get_default_device(); + } + if (Devices.size() <= (size_t)device_id) { + DP("Device ID %d does not have a matching RTL.\n", device_id); + return; + } + + DeviceTy &Device = Devices[device_id]; + if (!Device.IsInit) { + DP("uninit device: ignore"); + return; + } + + target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types); +} + +EXTERN void __tgt_target_data_end_nowait(int32_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int32_t *arg_types, + int32_t depNum, void *depList, + int32_t noAliasDepNum, + void *noAliasDepList) { + __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes, + arg_types); +} + +/// passes data to/from the target. +EXTERN void __tgt_target_data_update(int32_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int32_t *arg_types) { + DP("Entering data update with %d mappings\n", arg_num); + + // No devices available? + if (device_id == OFFLOAD_DEVICE_DEFAULT) { + device_id = omp_get_default_device(); + } + if (Devices.size() <= (size_t)device_id) { + DP("Device ID %d does not have a matching RTL.\n", device_id); + return; + } + + // Get device info. + DeviceTy &Device = Devices[device_id]; + if (!Device.IsInit) { + DP("uninit device: ignore"); + return; + } + + // process each input. + for (int32_t i = 0; i < arg_num; ++i) { + void *HstPtrBegin = args[i]; + long IsLast; + void *TgtPtrBegin = + Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast, false); + if (arg_types[i] & tgt_map_from) { + DP("Moving %ld bytes (tgt:%016lx) -> (hst:%016lx)\n", (long)arg_sizes[i], + (long)TgtPtrBegin, (long)HstPtrBegin); + Device.data_retrieve(HstPtrBegin, TgtPtrBegin, arg_sizes[i]); + } + if (arg_types[i] & tgt_map_to) { + DP("Moving %ld bytes (hst:%016lx) -> (tgt:%016lx)\n", (long)arg_sizes[i], + (long)HstPtrBegin, (long)TgtPtrBegin); + Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]); + } + } +} + +EXTERN void __tgt_target_data_update_nowait( + int32_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int32_t *arg_types, int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList) { + __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes, + arg_types); +} + +/// performs the same actions as data_begin in case arg_num is +/// non-zero and initiates run of the offloaded region on the target platform; +/// if arg_num is non-zero after the region execution is done it also +/// performs the same action as data_update and data_end above. This function +/// return 0 if it was able to transfer the execution to a target and an integer +/// different from zero otherwise. +static int target(int32_t device_id, void *host_ptr, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, + int32_t *arg_types, int32_t team_num, int32_t thread_limit, + int IsTeamConstruct, int IsConstrDestrRecursiveCall) { + DP("Entering target region with entry point %016lx and device Id %d\n", + (long)host_ptr, device_id); + + if (device_id == OFFLOAD_DEVICE_DEFAULT) { + device_id = omp_get_default_device(); + } + // got a new constructor/destructor? + if (device_id == OFFLOAD_DEVICE_CONSTRUCTOR || + device_id == OFFLOAD_DEVICE_DESTRUCTOR) { + DP("Got a constructor/destructor\n"); + for (unsigned D = 0; D < Devices.size(); D++) { + DeviceTy &Device = Devices[D]; + DP("device %d: enqueue constr/destr\n", D); + Device.PendingConstrDestrHostPtrList.push_back(host_ptr); + } + DP("Done with constructor/destructor\n"); + return OFFLOAD_SUCCESS; + } + + // No devices available? + if (!(device_id >= 0 && (size_t)device_id < Devices.size())) { + DP("Device ID %d does not have a matching RTL.\n", device_id); + return OFFLOAD_FAIL; + } + + // Get device info. + DeviceTy &Device = Devices[device_id]; + DP("Is the device %d (local is %d) initialized? %d\n", device_id, + Device.RTLDeviceID, (int)Device.IsInit); + + // Init the device if not done before. + if (!Device.IsInit) { + assert(!IsConstrDestrRecursiveCall && "constr & destr should not init RT"); + if (Device.init() != OFFLOAD_SUCCESS) { + DP("failed to init device %d\n", device_id); + return OFFLOAD_FAIL; + } + } + + if (!IsConstrDestrRecursiveCall && + !Device.PendingConstrDestrHostPtrList.empty()) { + DP("has pending constr/destr... call now\n"); + for (std::list::iterator + ii = Device.PendingConstrDestrHostPtrList.begin(), + ie = Device.PendingConstrDestrHostPtrList.end(); + ii != ie; ++ii) { + void *ConstrDestrHostPtr = *ii; + int rc = target(device_id, ConstrDestrHostPtr, 0, NULL, NULL, NULL, NULL, + 1, 1, true /*team*/, true /*recursive*/); + if (rc != OFFLOAD_SUCCESS) { + DP("failed to run constr/destr... enqueue it\n"); + return OFFLOAD_FAIL; + } + } + DP("done with pending constr/destr\n"); + Device.PendingConstrDestrHostPtrList.clear(); + } + + // Find the table information in the map or look it up in the translation + // tables. + TableMap *TM = 0; + HostPtrToTableMapTy::iterator TableMapIt = HostPtrToTableMap.find(host_ptr); + if (TableMapIt == HostPtrToTableMap.end()) { + // We don't have a map. So search all the registered libraries. + for (HostEntriesBeginToTransTableTy::iterator + ii = HostEntriesBeginToTransTable.begin(), + ie = HostEntriesBeginToTransTable.end(); + !TM && ii != ie; ++ii) { + // get the translation table (which contains all the good info). + TranslationTable *TransTable = &ii->second; + // iterate over all the host table entries to see if we can locate the + // host_ptr. + __tgt_offload_entry *begin = TransTable->HostTable.EntriesBegin; + __tgt_offload_entry *end = TransTable->HostTable.EntriesEnd; + __tgt_offload_entry *cur = begin; + for (uint32_t i = 0; cur < end; ++cur, ++i) { + if (cur->addr != host_ptr) + continue; + // we got a match, now fill the HostPtrToTableMap so that we + // may avoid this search next time. + TM = &HostPtrToTableMap[host_ptr]; + TM->Table = TransTable; + TM->Index = i; + break; + } + } + } else { + TM = &TableMapIt->second; + } + // No map for this host pointer found! + if (!TM) { + DP("Host ptr %016lx does not have a matching target pointer.\n", + (long)host_ptr); + return OFFLOAD_FAIL; + } + + // get target table. + assert(TM->Table->TargetsTable.size() > (size_t)device_id && + "Not expecting a device ID outside the tables bounds!"); + __tgt_target_table *TargetTable = TM->Table->TargetsTable[device_id]; + // if first call, need to move the data. + if (!TargetTable) { + // 1) get image. + assert(TM->Table->TargetsImages.size() > (size_t)device_id && + "Not expecting a device ID outside the tables bounds!"); + __tgt_device_image *img = TM->Table->TargetsImages[device_id]; + if (!img) { + DP("No image loaded for device id %d.\n", device_id); + return OFFLOAD_FAIL; + } + // 2) load image into the target table. + TargetTable = TM->Table->TargetsTable[device_id] = Device.load_binary(img); + // Unable to get table for this image: invalidate image and fail. + if (!TargetTable) { + DP("Unable to generate entries table for device id %d.\n", device_id); + TM->Table->TargetsImages[device_id] = 0; + return OFFLOAD_FAIL; + } + + // Verify if the two tables sizes match. + size_t hsize = + TM->Table->HostTable.EntriesEnd - TM->Table->HostTable.EntriesBegin; + size_t tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin; + + // Invalid image for this host entries! + if (hsize != tsize) { + DP("Host and Target tables mismatch for device id %d [%lx != %lx].\n", + device_id, hsize, tsize); + TM->Table->TargetsImages[device_id] = 0; + TM->Table->TargetsTable[device_id] = 0; + return OFFLOAD_FAIL; + } + assert(TM->Index < hsize && + "Not expecting index greater than the table size"); + + // process global data that needs to be mapped. + __tgt_target_table *HostTable = &TM->Table->HostTable; + for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin, + *CurrHostEntry = HostTable->EntriesBegin, + *EntryDeviceEnd = TargetTable->EntriesEnd; + CurrDeviceEntry != EntryDeviceEnd; + CurrDeviceEntry++, CurrHostEntry++) { + if (CurrDeviceEntry->size != 0) { + // has data. + assert(CurrDeviceEntry->size == CurrHostEntry->size && + "data size mismatch"); + assert(Device.getTgtPtrBegin(CurrHostEntry->addr, + CurrHostEntry->size) == NULL && + "data in declared target should not be already mapped"); + // add entry to map. + DP("add mapping from host 0x%llx to 0x%llx with size %lld\n\n", + (unsigned long long)CurrHostEntry->addr, + (unsigned long long)CurrDeviceEntry->addr, + (unsigned long long)CurrDeviceEntry->size); + Device.HostDataToTargetMap.push_front(HostDataToTargetTy( + (long)CurrHostEntry->addr, (long)CurrHostEntry->addr, + (long)CurrHostEntry->addr + CurrHostEntry->size, + (long)CurrDeviceEntry->addr, + (long)CurrDeviceEntry->addr + CurrDeviceEntry->size)); + } + } + } + + // Move data to device. + target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types); + + std::vector tgt_args; + + for (int32_t i = 0; i < arg_num; ++i) { + + if (arg_types[i] & tgt_map_extra) + continue; + + void *HstPtrBegin = args[i]; + void *HstPtrBase = args_base[i]; + void *TgtPtrBase; + long IsLast; // unused. + if (arg_types[i] & tgt_map_pointer) { + DP("Obtaining target argument from host pointer %016lx to object %016lx " + "\n", + (long)HstPtrBase, (long)HstPtrBegin); + void *TgtPtrBegin = + Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast, false); + TgtPtrBase = TgtPtrBegin; // no offset for ptrs. + } else { + DP("Obtaining target argument from host pointer %016lx\n", + (long)HstPtrBegin); + void *TgtPtrBegin = + Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast, false); + assert(TgtPtrBegin && "NULL argument for hst ptr"); + uint64_t PtrDelta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; + TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - PtrDelta); + } + tgt_args.push_back(TgtPtrBase); + } + // Push omp handle. + tgt_args.push_back((void *)0); + + // Launch device execution. + int rc; + DP("Launching target execution with pointer %016lx (index=%d).\n", + (long)TargetTable->EntriesBegin[TM->Index].addr, TM->Index); + if (IsTeamConstruct) { + rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr, + &tgt_args[0], tgt_args.size(), team_num, + thread_limit); + } else { + rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr, + &tgt_args[0], tgt_args.size()); + } + + if (rc) + return OFFLOAD_FAIL; + + // Move data from device. + target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types); + return OFFLOAD_SUCCESS; +} + +EXTERN int __tgt_target(int32_t device_id, void *host_ptr, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, + int32_t *arg_types) { + return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, + arg_types, 0, 0, false /*team*/, false /*recursive*/); +} + +EXTERN int __tgt_target_nowait(int32_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int32_t *arg_types, + int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList) { + return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, + arg_types); +} + +EXTERN int __tgt_target_teams(int32_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int32_t *arg_types, + int32_t team_num, int32_t thread_limit) { + return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, + arg_types, team_num, thread_limit, true /*team*/, + false /*recursive*/); +} + +EXTERN int __tgt_target_teams_nowait(int32_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, + int32_t *arg_types, int32_t team_num, + int32_t thread_limit, int32_t depNum, + void *depList, int32_t noAliasDepNum, + void *noAliasDepList) { + return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args, + arg_sizes, arg_types, team_num, thread_limit); +} Index: libomptarget/test/CMakeLists.txt =================================================================== --- /dev/null +++ libomptarget/test/CMakeLists.txt @@ -0,0 +1,93 @@ +# CMakeLists.txt file for unit testing OpenMP Library +include(FindPythonInterp) +include(CheckTypeSize) +if(NOT PYTHONINTERP_FOUND) + libomptarget_warning_say("Could not find Python.") + libomptarget_warning_say("The check-libomptarget target will not be available!") + return() +endif() + +set(LIBOMPTARGET_TEST_CFLAGS "" CACHE STRING + "Extra compiler flags to send to the test compiler") + +if(${LIBOMPTARGET_STANDALONE_BUILD}) + # Make sure we can use the console pool for recent cmake and ninja > 1.5 + if(CMAKE_VERSION VERSION_LESS 3.1.20141117) + set(cmake_3_2_USES_TERMINAL) + else() + set(cmake_3_2_USES_TERMINAL USES_TERMINAL) + endif() + set(LIBOMPTARGET_TEST_C_COMPILER ${CMAKE_C_COMPILER} CACHE STRING + "C compiler to use for testing OpenMP offloading library") + set(LIBOMPTARGET_TEST_CXX_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING + "C++ compiler to use for testing OpenMP offloading library") + set(LIBOMPTARGET_TEST_OPENMP_FLAG -fopenmp CACHE STRING + "OpenMP compiler flag to use for testing OpenMP offloading library") + set(LIBOMPTARGET_LLVM_LIT_EXECUTABLE "" CACHE STRING + "Path to llvm-lit") + find_program(LIT_EXECUTABLE NAMES llvm-lit ${LIBOMPTARGET_LLVM_LIT_EXECUTABLE}) + if(NOT LIT_EXECUTABLE) + libomptarget_say("Cannot find llvm-lit.") + libomptarget_say("Please put llvm-lit in your PATH or set LIBOMPTARGET_LLVM_LIT_EXECUTABLE to its full path") + libomptarget_warning_say("The check-libomptarget target will not be available!") + return() + endif() + + set(LIBOMPTARGET_FILECHECK_EXECUTABLE "" CACHE STRING + "Path to FileCheck") + find_program(LIBOMPTARGET_FILECHECK NAMES FileCheck ${LIBOMPTARGET_FILECHECK_EXECUTABLE}) + if(NOT LIBOMPTARGET_FILECHECK) + libomptarget_say("Cannot find FileCheck.") + libomptarget_say("Please put FileCheck in your PATH or set LIBOMPTARGET_FILECHECK_EXECUTABLE to its full path") + libomptarget_warning_say("The check-libomptarget target will not be available!") + return() + endif() + + # Set lit arguments + # The -j 1 lets the actual tests run with the entire machine. + # We have one test thread that spawns the tests serially. This allows + # Each test to use the entire machine. + set(LIBOMPTARGET_LIT_ARGS_DEFAULT "-sv --show-unsupported --show-xfail -j 1") + if(MSVC OR XCODE) + set(LIBOMPTARGET_LIT_ARGS_DEFAULT "${LIBOMPTARGET_LIT_ARGS_DEFAULT} --no-progress-bar") + endif() + set(LIBOMPTARGET_LIT_ARGS "${LIBOMPTARGET_LIT_ARGS_DEFAULT}" CACHE STRING + "Default options for lit") + separate_arguments(LIBOMPTARGET_LIT_ARGS) + add_custom_target(check-libomptarget + COMMAND ${PYTHON_EXECUTABLE} ${LIT_EXECUTABLE} ${LIBOMPTARGET_LIT_ARGS} ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS omptarget + COMMENT "Running libomptarget tests" + ${cmake_3_2_USES_TERMINAL} + ) + + set(LIBOMPTARGET_OPENMP_HEADER_FOLDER "" CACHE STRING + "Path to folder containing omp.h") + set(LIBOMPTARGET_OPENMP_HOST_RTL_FOLDER "" CACHE STRING + "Path to folder containing libomp.h") +else() + # LLVM source tree build, test just-built clang + if(NOT MSVC) + set(LIBOMPTARGET_TEST_C_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang) + set(LIBOMPTARGET_TEST_CXX_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++) + set(LIBOMPTARGET_FILECHECK ${LLVM_RUNTIME_OUTPUT_INTDIR}/FileCheck) + else() + libomptarget_error_say("Not prepared to run tests on Windows systems.") + endif() + set(LIBOMPTARGET_TEST_OPENMP_FLAG -fopenmp=libomp) + # Use add_lit_testsuite() from LLVM CMake. This also depends on OpenMP + # implementation because it uses omp.h. + add_lit_testsuite(check-libomptarget + "Running libomptarget tests" + ${CMAKE_CURRENT_BINARY_DIR} + ARGS "-j 1" + DEPENDS omptarget omp + ) + + set(LIBOMPTARGET_OPENMP_HEADER_FOLDER "${LIBOMPTARGET_BINARY_DIR}/../runtime/src") +endif() + +# Configure the lit.site.cfg.in file +set(AUTO_GEN_COMMENT "## Autogenerated by libomptarget configuration.\n# Do not edit!") +configure_file(lit.site.cfg.in lit.site.cfg @ONLY) + Index: libomptarget/test/lit.cfg =================================================================== --- /dev/null +++ libomptarget/test/lit.cfg @@ -0,0 +1,116 @@ +# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79: +# Configuration file for the 'lit' test runner. + +import os +import lit.formats + +# Tell pylint that we know config and lit_config exist somewhere. +if 'PYLINT_IMPORT' in os.environ: + config = object() + lit_config = object() + +def append_dynamic_library_path(name, value, sep): + if name in config.environment: + config.environment[name] = value + sep + config.environment[name] + else: + config.environment[name] = value + +# name: The name of this test suite. +config.name = 'libomptarget' + +# suffixes: A list of file extensions to treat as test files. +config.suffixes = ['.c', '.cpp', '.cc'] + +# test_source_root: The root path where tests are located. +config.test_source_root = os.path.dirname(__file__) + +# test_exec_root: The root object directory where output is placed +config.test_exec_root = config.libomptarget_obj_root + +# test format +config.test_format = lit.formats.ShTest() + +# compiler flags +config.test_cflags = config.test_openmp_flag + \ + " -I " + config.test_source_root + \ + " -I " + config.omp_header_directory + \ + " -L " + config.library_dir; + +if config.omp_host_rtl_directory: + config.test_cflags = config.test_cflags + " -L " + \ + config.omp_host_rtl_directory + +config.test_cflags = config.test_cflags + " " + config.test_extra_cflags + +# Setup environment to find dynamic library at runtime +if config.operating_system == 'Windows': + append_dynamic_library_path('PATH', config.library_dir, ";") + append_dynamic_library_path('PATH', config.omp_host_rtl_directory, ";") +elif config.operating_system == 'Darwin': + append_dynamic_library_path('DYLD_LIBRARY_PATH', config.library_dir, ":") + append_dynamic_library_path('DYLD_LIBRARY_PATH', \ + config.omp_host_rtl_directory, ";") + config.test_cflags += " -Wl,-rpath," + config.library_dir + config.test_cflags += " -Wl,-rpath," + config.omp_host_rtl_directory +else: # Unices + append_dynamic_library_path('LD_LIBRARY_PATH', config.library_dir, ":") + append_dynamic_library_path('LD_LIBRARY_PATH', \ + config.omp_host_rtl_directory, ":") + +# substitutions +# - for targets that exist in the system create the actual command. +# - for valid targets that do not exist in the system, return false, so that the +# same test can be used for different targets. + +# Scan all the valid targets. +for libomptarget_target in config.libomptarget_all_targets: + # Is this target in the current system? If so create a compile, run and test + # command. Otherwise create command that return false. + if libomptarget_target in config.libomptarget_system_targets: + config.substitutions.append(("%libomptarget-compilexx-run-and-check-" + \ + libomptarget_target, \ + "%libomptarget-compilexx-and-run-" + libomptarget_target + \ + " | " + config.libomptarget_filecheck + " %s")) + config.substitutions.append(("%libomptarget-compile-run-and-check-" + \ + libomptarget_target, \ + "%libomptarget-compile-and-run-" + libomptarget_target + \ + " | " + config.libomptarget_filecheck + " %s")) + config.substitutions.append(("%libomptarget-compilexx-and-run-" + \ + libomptarget_target, \ + "%clangxx-" + libomptarget_target + " %s -o %t-" + \ + libomptarget_target + " && %t-" + libomptarget_target)) + config.substitutions.append(("%libomptarget-compile-and-run-" + \ + libomptarget_target, \ + "%clang-" + libomptarget_target + " %s -o %t-" + \ + libomptarget_target + " && %t-" + libomptarget_target)) + config.substitutions.append(("%clangxx-" + libomptarget_target, \ + "%clangxx %cflags -omptargets=" + libomptarget_target)) + config.substitutions.append(("%clang-" + libomptarget_target, \ + "%clang %cflags -omptargets=" + libomptarget_target)) + config.substitutions.append(("%fcheck-" + libomptarget_target, \ + config.libomptarget_filecheck + " %s")) + else: + config.substitutions.append(("%libomptarget-compile-run-and-check-" + \ + libomptarget_target, \ + "echo ignored-command")) + config.substitutions.append(("%libomptarget-compilexx-run-and-check-" + \ + libomptarget_target, \ + "echo ignored-command")) + config.substitutions.append(("%libomptarget-compile-and-run-" + \ + libomptarget_target, \ + "echo ignored-command")) + config.substitutions.append(("%libomptarget-compilexx-and-run-" + \ + libomptarget_target, \ + "echo ignored-command")) + config.substitutions.append(("%clang-" + libomptarget_target, \ + "echo ignored-command")) + config.substitutions.append(("%clangxx-" + libomptarget_target, \ + "echo ignored-command")) + config.substitutions.append(("%fcheck-" + libomptarget_target, \ + "echo ignored-command")) + +config.substitutions.append(("%clangxx", config.test_cxx_compiler)) +config.substitutions.append(("%clang", config.test_c_compiler)) +config.substitutions.append(("%openmp_flag", config.test_openmp_flag)) +config.substitutions.append(("%cflags", config.test_cflags)) + Index: libomptarget/test/lit.site.cfg.in =================================================================== --- /dev/null +++ libomptarget/test/lit.site.cfg.in @@ -0,0 +1,20 @@ +@AUTO_GEN_COMMENT@ + +config.test_c_compiler = "@LIBOMPTARGET_TEST_C_COMPILER@" +config.test_cxx_compiler = "@LIBOMPTARGET_TEST_CXX_COMPILER@" +config.test_openmp_flag = "@LIBOMPTARGET_TEST_OPENMP_FLAG@" +# For the moment we still need to pass libomptarget explicitly. Once the driver +# patch, lands, this is not required anymore. +config.test_extra_cflags = "-lomptarget @LIBOMPTARGET_TEST_CFLAGS@" +config.libomptarget_obj_root = "@CMAKE_CURRENT_BINARY_DIR@" +config.library_dir = "@LIBOMPTARGET_LIBRARY_DIR@" +config.omp_header_directory = "@LIBOMPTARGET_OPENMP_HEADER_FOLDER@" +config.omp_host_rtl_directory = "@LIBOMPTARGET_OPENMP_HOST_RTL_FOLDER@" +config.operating_system = "@CMAKE_SYSTEM_NAME@" +config.libomptarget_all_targets = "@LIBOMPTARGET_ALL_TARGETS@".split() +config.libomptarget_system_targets = "@LIBOMPTARGET_SYSTEM_TARGETS@".split() +config.libomptarget_filecheck = "@LIBOMPTARGET_FILECHECK@" + +# Let the main config do the real work. +lit_config.load_config(config, "@LIBOMPTARGET_BASE_DIR@/test/lit.cfg") + Index: libomptarget/test/offloading/offloading_success.c =================================================================== --- /dev/null +++ libomptarget/test/offloading/offloading_success.c @@ -0,0 +1,20 @@ +// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +int main(void) { + int isHost = 0; + +#pragma omp target + { isHost = omp_is_initial_device(); } + + // The compiler doesn't have support to launch the target region on the + // device. + // CHECK: Target region executed on the host + printf("Target region executed on the %s\n", isHost ? "host" : "device"); + + return !isHost; +} Index: libomptarget/test/offloading/offloading_success.cpp =================================================================== --- /dev/null +++ libomptarget/test/offloading/offloading_success.cpp @@ -0,0 +1,20 @@ +// RUN: %libomptarget-compilexx-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +int main(void) { + int isHost = 0; + +#pragma omp target + { isHost = omp_is_initial_device(); } + + // The compiler doesn't have support to launch the target region on the + // device. + // CHECK: Target region executed on the host + printf("Target region executed on the %s\n", isHost ? "host" : "device"); + + return !isHost; +}