Index: External/CMakeLists.txt =================================================================== --- External/CMakeLists.txt +++ External/CMakeLists.txt @@ -23,6 +23,7 @@ endmacro() llvm_add_subdirectories( + CUDA HMMER Nurbs Povray Index: External/CUDA/CMakeLists.txt =================================================================== --- /dev/null +++ External/CUDA/CMakeLists.txt @@ -0,0 +1,267 @@ +llvm_externals_find(TEST_SUITE_CUDA_ROOT "cuda" "CUDA prerequisites") + +# Helper function to extract version number at the end of the string +# Input: get_version(Var String) +# Where String = /some/string/with/version-x.y.z +# Output: +# Sets Var=x.y.z in parent scope +function(get_version Var Path) + string(REGEX REPLACE ".*-" "" Version ${Path}) + set(${Var} ${Version} PARENT_SCOPE) +endfunction(get_version) + +# Helper function to glob CUDA source files and set LANGUAGE property +# to CXX on each of them. Sets Var in parent scope to the list of +# files found. +function(cuda_glob Var) + file(GLOB FileList ${ARGN}) + foreach(File IN LISTS FileList) + if (${File} MATCHES ".*\.cu$") + set_source_files_properties(${File} PROPERTIES LANGUAGE CXX) + endif() + endforeach() + set(${Var} ${FileList} PARENT_SCOPE) +endfunction(cuda_glob) + +function(create_one_local_test Name FileGlob) + set(PROG ${Name}-${VariantSuffix}) + cuda_glob(Source ${FileGlob}) + llvm_test_run() + llvm_multisource() + target_link_libraries(${PROG} ${VariantLibs}) + list(APPEND VARIANT_CUDA_TESTS ${PROG}) +endfunction() + +# Create targets for CUDA tests that are part of the test suite. +function(create_local_cuda_tests VariantSuffix) + create_one_local_test(axpy axpy.cu) + create_one_local_test(empty empty.cu) + set(VARIANT_CUDA_TESTS ${VARIANT_CUDA_TESTS} PARENT_SCOPE) +endfunction() + +macro(thrust_make_test_name TestName TestSourcePath) + string(REPLACE "${THRUST_PATH}/testing/" "" _tmp ${TestSourcePath}) + string(REPLACE "/" "-" _tmp ${_tmp}) + string(REPLACE "<" "_" _tmp ${_tmp}) + string(REPLACE ">" "_" _tmp ${_tmp}) + string(REGEX REPLACE "\.(cpp|cu)$" "" _tmp ${_tmp}) + set(${TestName} ${_tmp}) +endmacro() + +function(create_one_thrust_test_executable TestSourcePath) + thrust_make_test_name(TestName ${TestSourcePath}) + set(PROG thrust-${TestName}-${VariantSuffix}) + set(Source ${TestSourcePath}) + llvm_multisource() + target_link_libraries(${PROG} ${VariantLibs}) + set(THRUST_TEST_EXECUTABLE ${PROG} PARENT_SCOPE) +endfunction() + +function(create_one_thrust_test TestSourcePath) + thrust_make_test_name(TestName ${TestSourcePath}) + set(PROG thrust-${TestName}-${VariantSuffix}) + set(Source ${TestSourcePath}) + llvm_multisource() + target_link_libraries(${PROG} ${VariantLibs}) + list(APPEND VARIANT_CUDA_TESTS ${PROG}) + set(VARIANT_CUDA_TESTS ${VARIANT_CUDA_TESTS} PARENT_SCOPE) +endfunction() + +function(create_thrust_test_file Executable TestName) + thrust_make_test_name(TestName ${TestName}) + message(STATUS "Created test ${Executable}-${TestName}") + set(executable_path ${CMAKE_CURRENT_BINARY_DIR}/${Executable}) + if(DEFINED LARGE_PROBLEM_SIZE) + llvm_test_run(--sizes=large ${TestName}) + else() + llvm_test_run(${TestName}) + endif() + llvm_add_test("${executable_path}-${TestName}.test" "${executable_path}") +endfunction() + +function(create_thrust_tests VariantSuffix) + list(APPEND CPPFLAGS + -DTHRUST_HOST_SYSTEM=THRUST_HOST_SYSTEM_CPP + -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CUDA) + list(APPEND CPPFLAGS + -I${CudaPath}/include + -I${THRUST_PATH} -I${THRUST_PATH}/testing) + + # test framework is common for all tests, so we build it once as a + # library. Test suite's cmake files don't handle libraries well, so + # we need to set compile flags manually. + add_library(ThrustTestFrameworkLib-${VariantSuffix} + STATIC ${ThrustTestFramework}) + append_compile_flags(ThrustTestFrameworkLib-${VariantSuffix} ${CPPFLAGS}) + add_dependencies(ThrustTestFrameworkLib-${VariantSuffix} + timeit-host fpcmp-host) + list(APPEND VariantLibs ThrustTestFrameworkLib-${VariantSuffix}) + + if(DEFINED THRUST_SPLIT_TESTS AND "${THRUST_SPLIT_TESTS}") + # Create individual test executable per test source file. This + # stresses cmake -- it consumes tons of memory and takes forever + # to finish. + foreach(TestSourcePath IN LISTS ThrustAllTestSources) + create_one_thrust_test_executable(${TestSourcePath}) + list(APPEND VARIANT_CUDA_TESTS ${THRUST_TEST_EXECUTABLE}) + endforeach() + else() + # Single monolitic test executable. Alas this stresses linker + # during debug build. Final executable may end up being too large + # to link. + set(PROG thrust-${VariantSuffix}) + set(Source ${ThrustTestFramework} ${ThrustAllTestSources}) + if(DEFINED LARGE_PROBLEM_SIZE) + llvm_test_run(--verbose --sizes=large) + else() + llvm_test_run(--verbose) + endif() + llvm_multisource() + target_link_libraries(${PROG} ${VariantLibs}) + list(APPEND VARIANT_CUDA_TESTS ${PROG}) + # We can create one test script per thrust test, but running + # thrust tests in parallel is bottlenecked by GPU and startup + # overhead, so it's actually way slower than running all tests + # sequentially. + endif() + set(VARIANT_CUDA_TESTS ${VARIANT_CUDA_TESTS} PARENT_SCOPE) +endfunction() + +# Create set of tests for a given {CUDA,C++ standard,C++ library} tuple. +# Sets VARIANT_CUDA_TESTS targets in parent's scope. +function(create_cuda_test_variant CudaPath Std GccPath) + get_version(CudaVersion ${CudaPath}) + set(VariantSuffix "cuda-${CudaVersion}") + list(APPEND CPPFLAGS --cuda-path=${CudaPath}) + list(APPEND VariantLibs cudart-${CudaVersion}) + + if(Std) + set(VariantSuffix "${VariantSuffix}-${Std}") + list(APPEND CPPFLAGS -std=${Std}) + list(APPEND LDFLAGS -std=${Std}) + endif() + + if(GccPath) + get_version(GccVersion ${GccPath}) + set(VariantSuffix "${VariantSuffix}-libstdc++-${GccVersion}") + + # Tell clang to use libstdc++ and where to find it. + list(APPEND CPPFLAGS -gcc-toolchain ${GccPath}/usr/local -stdlib=libstdc++) + # Add libstdc++ as link dependency. + list(APPEND VariantLibs libstdcxx-${GccVersion}) + else() + # Tell clang to use libc++ + # We also need to add compiler's include path for cxxabi.h + get_filename_component(_compiler_path ${CMAKE_CXX_COMPILER} DIRECTORY) + list(APPEND CPPFLAGS -stdlib=libc++ -I${_compiler_path}/../include) + list(APPEND LDFLAGS -stdlib=libc++) + list(APPEND VariantLibs libcxx) + endif() + + message(STATUS "Creating CUDA test variant ${VariantSuffix}") + create_local_cuda_tests(${VariantSuffix}) + if(EXISTS ${THRUST_PATH}) + create_thrust_tests(${VariantSuffix}) + endif() + + # Create target to build all tests for this variant + add_custom_target(cuda-tests-${VariantSuffix} DEPENDS ${VARIANT_CUDA_TESTS} + COMMENT "Build CUDA test variant ${VariantSuffix}") + # And pass it up to the caller + set(VARIANT_CUDA_TESTS cuda-tests-${VariantSuffix} PARENT_SCOPE) +endfunction(create_cuda_test_variant) + +macro(create_cuda_tests) + message(STATUS "Checking CUDA prerequisites in ${TEST_SUITE_CUDA_ROOT}") + file(GLOB CudaVersions ${TEST_SUITE_CUDA_ROOT}/cuda-*) + foreach(CudaDir IN LISTS CudaVersions) + get_version(CudaVersion ${CudaDir}) + message(STATUS "Found CUDA ${CudaVersion} ${CudaDir}") + list(APPEND CUDA_PATHS ${CudaDir}) + add_library(cudart-${CudaVersion} SHARED IMPORTED) + set_property(TARGET cudart-${CudaVersion} PROPERTY IMPORTED_LOCATION + ${CudaDir}/lib64/libcudart.so) + endforeach(CudaDir) + + if(NOT CUDA_PATHS) + message(SEND_ERROR + "There are no CUDA installations in ${TEST_SUITE_CUDA_ROOT}") + return() + endif() + + # set default GPU arch + if(NOT CUDA_GPU_ARCH) + list(APPEND CUDA_GPU_ARCH sm_35) + endif() + + foreach(GpuArch IN LISTS CUDA_GPU_ARCH) + list(APPEND CPPFLAGS --cuda-gpu-arch=${GpuArch}) + endforeach() + + file(GLOB GccVersions ${TEST_SUITE_CUDA_ROOT}/gcc-*) + foreach(GccDir IN LISTS GccVersions) + get_version(GccVersion ${GccDir}) + message(STATUS "Found GCC ${GccVersion}") + list(APPEND GCC_PATHS ${GccDir}) + add_library(libstdcxx-${GccVersion} SHARED IMPORTED) + set_property(TARGET libstdcxx-${GccVersion} PROPERTY IMPORTED_LOCATION + ${GccDir}/usr/local/lib64/libstdc++.so) + endforeach(GccDir) + + # Find location of libc++ + execute_process( + COMMAND ${CMAKE_CXX_COMPILER} -print-file-name=libc++.so + OUTPUT_VARIABLE _path_to_libcxx + OUTPUT_STRIP_TRAILING_WHITESPACE) + + if (EXISTS ${_path_to_libcxx}) + add_library(libcxx SHARED IMPORTED) + set_property(TARGET libcxx PROPERTY IMPORTED_LOCATION ${_path_to_libcxx}) + else() + message(ERROR "Can't find libcxx location.") + return() + endif() + + if(EXISTS "${TEST_SUITE_CUDA_ROOT}/thrust") + set(THRUST_PATH "${TEST_SUITE_CUDA_ROOT}/thrust" CACHE + PATH "Thrust library path") + message(STATUS "Found Thrust ${THRUST_PATH}") + cuda_glob(ThrustTestCppSources ${THRUST_PATH}/testing/*.cpp) + cuda_glob(ThrustTestCudaSources ${THRUST_PATH}/testing/*.cu) + cuda_glob(ThrustTestCudaBackendSources + ${THRUST_PATH}/testing/backend/decompose.cu + ${THRUST_PATH}/testing/backend/cuda/*.cu) + + list(APPEND ThrustAllTestSources ${ThrustTestCppSources} + ${ThrustTestCudaSources} ${ThrustTestCudaBackendSources}) + list(APPEND ThrustTestFramework + ${THRUST_PATH}/testing/testframework.cpp + ${THRUST_PATH}/testing/reduce_large.cu + ${THRUST_PATH}/testing/unittest_tester.cu + ${THRUST_PATH}/testing/backend/cuda/testframework.cu) + + # Remove test framework files from the list of test files. + foreach(File IN LISTS ThrustTestFramework) + list(REMOVE_ITEM ThrustAllTestSources ${File}) + endforeach() + endif() + + foreach(CudaPath ${CUDA_PATHS}) + foreach(Std IN ITEMS "" "c++11") + foreach(GccPath IN LISTS GCC_PATHS ITEMS "") + set(VARIANT_CUDA_TESTS) + create_cuda_test_variant("${CudaPath}" "${Std}" "${GccPath}") + list(APPEND ALL_CUDA_TESTS ${VARIANT_CUDA_TESTS}) + endforeach() + endforeach() + endforeach() + + # convenience target to build all CUDA tests. + add_custom_target(cuda-tests-all DEPENDS ${ALL_CUDA_TESTS} + COMMENT "Build all CUDA tests.") +endmacro(create_cuda_tests) + +if(TEST_SUITE_CUDA_ROOT) + create_cuda_tests() +endif() + Index: External/CUDA/README =================================================================== --- /dev/null +++ External/CUDA/README @@ -0,0 +1,39 @@ +CUDA Tests +========== + +Cuda tests are enabled if cmake is invoked with +-DTEST_SUITE_EXTERNALS_DIR= and specified externals +directory contains at least one CUDA installation. + +Expected externals directory structure: +Externals/ + cuda/ + cuda-X.Y/ -- One or more CUDA installation. + gcc-X.Y.Z/ -- One or more GCC installation for libstdc++. + +export EXTERNALS=/your/externals/path +export CUDA_EXTERNALS=$EXTERNALS/cuda +export CLANG_DIR=/your/clang/build/dir +export TEST_SUITE_DIR=/path/to/test-suite-sources + +* Externals installation + For each cuda version: + # bash cuda-linux64-rel-XXXXXX.run -prefix=$CUDA_EXTERNALS/cuda-7.5 -noprompt -nosymlink + + For each GCC version: + * extract GCC sources and cd into gcc-X.Y.Z + # ../configure --enable-languages=c,c++ --disable-libsanitizer + # make bootstrap + # make DESTDIR=$CUDA_EXTERNALS/gcc-X.Y.Z install + + Fetch thrust: + # cd $CUDA_EXTERNALS + # git clone https://github.com/thrust/thrust.git + +* Configure, build and run tests + # mkdir test-suite-build-dir + # cd test-suite-build-dir + # PATH=$CLANG_DIR/bin:$PATH CXX=clang++ CC=clang \ + cmake -G Ninja -DTEST_SUITE_EXTERNALS_DIR=$EXTERNALS $TEST_SUITE_DIR + # ninja cuda-all + # lit -j1 Externals/CUDA Index: External/CUDA/axpy.cu =================================================================== --- /dev/null +++ External/CUDA/axpy.cu @@ -0,0 +1,37 @@ +#include + +__global__ void axpy(float a, float* x, float* y) { + y[threadIdx.x] = a * x[threadIdx.x]; +} + +int main(int argc, char* argv[]) { + const int kDataLen = 4; + + float a = 2.0f; + float host_x[kDataLen] = {1.0f, 2.0f, 3.0f, 4.0f}; + float host_y[kDataLen]; + + // Copy input data to device. + float* device_x; + float* device_y; + cudaMalloc(&device_x, kDataLen * sizeof(float)); + cudaMalloc(&device_y, kDataLen * sizeof(float)); + cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), + cudaMemcpyHostToDevice); + + // Launch the kernel. + axpy<<<1, kDataLen>>>(a, device_x, device_y); + + // Copy output data to host. + cudaDeviceSynchronize(); + cudaMemcpy(host_y, device_y, kDataLen * sizeof(float), + cudaMemcpyDeviceToHost); + + // Print the results. + for (int i = 0; i < kDataLen; ++i) { + std::cout << "y[" << i << "] = " << host_y[i] << "\n"; + } + + cudaDeviceReset(); + return 0; +} Index: External/CUDA/empty.cu =================================================================== --- /dev/null +++ External/CUDA/empty.cu @@ -0,0 +1 @@ +int main(int argc, char **argv) { return 0; }