Skip to content

Commit d5ae4e6

Browse files
committedFeb 12, 2018
[OpenMP][libomptarget] Enable the compilation of multiple bc libraries for runtime inlining
Summary: Different NVIDIA GPUs support different compute capabilities. To enable the inlining of runtime functions and the best performance on different generations of NVIDIA GPUs, a bc library for each compute capability needs to be compiled. The same compiler build will then be usable in conjunction with multiple generations of NVIDIA GPUs. To differentiate between versions of the same bc lib, the output file name will contain the compute capability ID. Depends on D14254 Reviewers: Hahnfeld, hfinkel, carlo.bertolli, caomhin, ABataev, grokos Reviewed By: Hahnfeld, grokos Subscribers: guansong, mgorny, openmp-commits Differential Revision: https://reviews.llvm.org/D41724 llvm-svn: 324904
1 parent 7dc0f1e commit d5ae4e6

File tree

2 files changed

+53
-43
lines changed

2 files changed

+53
-43
lines changed
 

‎openmp/README.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -280,10 +280,10 @@ Options for ``NVPTX device RTL``
280280
compatible with NVCC, this option can be use to pass to NVCC a valid compiler
281281
to avoid the error.
282282

283-
**LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY** = ``35``
284-
CUDA compute capability that should be supported by the NVPTX device RTL. E.g.
285-
for compute capability 6.0, the option "60" should be used. Compute capability
286-
3.5 is the minimum required.
283+
**LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES** = ``35``
284+
List of CUDA compute capabilities that should be supported by the NVPTX
285+
device RTL. E.g. for compute capabilities 6.0 and 7.0, the option "60,70"
286+
should be used. Compute capability 3.5 is the minimum required.
287287

288288
**LIBOMPTARGET_NVPTX_DEBUG** = ``OFF|ON``
289289
Enable printing of debug messages from the NVPTX device RTL.

‎openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt

+49-39
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,18 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
6060

6161
# Get the compute capability the user requested or use SM_35 by default.
6262
# SM_35 is what clang uses by default.
63-
set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY 35 CACHE STRING
64-
"CUDA Compute Capability to be used to compile the NVPTX device RTL.")
65-
set(CUDA_ARCH -arch sm_${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
63+
set(default_capabilities 35)
64+
if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY)
65+
set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
66+
libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES")
67+
endif()
68+
set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${default_capabilities} CACHE STRING
69+
"List of CUDA Compute Capabilities to be used to compile the NVPTX device RTL.")
70+
string(REPLACE "," ";" nvptx_sm_list ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES})
71+
72+
foreach(sm ${nvptx_sm_list})
73+
set(CUDA_ARCH ${CUDA_ARCH} -gencode arch=compute_${sm},code=sm_${sm})
74+
endforeach()
6675

6776
# Activate RTL message dumps if requested by the user.
6877
set(LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL
@@ -152,46 +161,47 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
152161

153162
# Get the compute capability the user requested or use SM_35 by default.
154163
set(CUDA_ARCH "")
155-
set(CUDA_ARCH --cuda-gpu-arch=sm_${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
156-
157-
# Compile cuda files to bitcode.
158-
set(bc_files "")
159-
foreach(src ${cuda_src_files})
160-
get_filename_component(infile ${src} ABSOLUTE)
161-
get_filename_component(outfile ${src} NAME)
162-
163-
add_custom_command(OUTPUT ${outfile}.bc
164-
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${CUDA_FLAGS} ${CUDA_ARCH} ${CUDA_INCLUDES}
165-
-c ${infile} -o ${outfile}.bc
166-
DEPENDS ${infile}
167-
IMPLICIT_DEPENDS CXX ${infile}
168-
COMMENT "Building LLVM bitcode ${outfile}.bc"
169-
VERBATIM
164+
foreach(sm ${nvptx_sm_list})
165+
set(CUDA_ARCH --cuda-gpu-arch=sm_${sm})
166+
167+
# Compile cuda files to bitcode.
168+
set(bc_files "")
169+
foreach(src ${cuda_src_files})
170+
get_filename_component(infile ${src} ABSOLUTE)
171+
get_filename_component(outfile ${src} NAME)
172+
173+
add_custom_command(OUTPUT ${outfile}-sm_${sm}.bc
174+
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${CUDA_FLAGS} ${CUDA_ARCH} ${CUDA_INCLUDES}
175+
-c ${infile} -o ${outfile}-sm_${sm}.bc
176+
DEPENDS ${infile}
177+
IMPLICIT_DEPENDS CXX ${infile}
178+
COMMENT "Building LLVM bitcode ${outfile}-sm_${sm}.bc"
179+
VERBATIM
180+
)
181+
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}-sm_${sm}.bc)
182+
183+
list(APPEND bc_files ${outfile}-sm_${sm}.bc)
184+
endforeach()
185+
186+
# Link to a bitcode library.
187+
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc
188+
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER}
189+
-o ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc ${bc_files}
190+
DEPENDS ${bc_files}
191+
COMMENT "Linking LLVM bitcode libomptarget-nvptx-sm_${sm}.bc"
170192
)
171-
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}.bc)
172-
173-
list(APPEND bc_files ${outfile}.bc)
174-
endforeach()
193+
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx-sm_${sm}.bc)
175194

176-
# Link to a bitcode library.
177-
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc
178-
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER}
179-
-o ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc ${bc_files}
180-
DEPENDS ${bc_files}
181-
COMMENT "Linking LLVM bitcode libomptarget-nvptx.bc"
182-
)
183-
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx.bc)
184-
185-
add_custom_target(omptarget-nvptx-bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc)
195+
add_custom_target(omptarget-nvptx-${sm}-bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc)
186196

187-
# Copy library to destination.
188-
add_custom_command(TARGET omptarget-nvptx-bc POST_BUILD
189-
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc
190-
$<TARGET_FILE_DIR:omptarget-nvptx>)
191-
192-
# Install device RTL under the lib destination folder.
193-
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc DESTINATION "lib")
197+
# Copy library to destination.
198+
add_custom_command(TARGET omptarget-nvptx-${sm}-bc POST_BUILD
199+
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc
200+
$<TARGET_FILE_DIR:omptarget-nvptx>)
194201

202+
# Install device RTL under the lib destination folder.
203+
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc DESTINATION "lib")
204+
endforeach()
195205
endif()
196206
endif()
197207

0 commit comments

Comments
 (0)
Please sign in to comment.