Index: test-suite/trunk/External/CMakeLists.txt =================================================================== --- test-suite/trunk/External/CMakeLists.txt +++ test-suite/trunk/External/CMakeLists.txt @@ -23,6 +23,7 @@ endmacro() llvm_add_subdirectories( + CUDA HMMER Nurbs Povray Index: test-suite/trunk/External/CUDA/CMakeLists.txt =================================================================== --- test-suite/trunk/External/CUDA/CMakeLists.txt +++ test-suite/trunk/External/CUDA/CMakeLists.txt @@ -0,0 +1,275 @@ +llvm_externals_find(TEST_SUITE_CUDA_ROOT "cuda" "CUDA prerequisites") + +# Helper macro to extract version number at the end of the string +# Input: get_version(Var String) +# Where String = /some/string/with/version-x.y.z +# Output: +# Sets Var=x.y.z +macro(get_version Var Path) + string(REGEX REPLACE ".*-" "" Version ${Path}) + set(${Var} ${Version}) +endmacro (get_version) + +# Helper function to glob CUDA source files and set LANGUAGE property +# to CXX on each of them. Sets Var in parent scope to the list of +# files found. +macro(cuda_glob Var) + file(GLOB FileList ${ARGN}) + foreach(File IN LISTS FileList) + if (${File} MATCHES ".*\.cu$") + set_source_files_properties(${File} PROPERTIES LANGUAGE CXX) + endif() + endforeach() + set(${Var} ${FileList}) +endmacro(cuda_glob) + +macro(create_one_local_test Name FileGlob) + cuda_glob(_sources ${FileGlob}) + set(_executable ${Name}-${VariantSuffix}) + set(_executable_path ${CMAKE_CURRENT_BINARY_DIR}/${_executable}) + # Verify reference output if it exists. + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${Name}.reference_output) + set(NO_REFERENCE_OUTPUT 1) + set(REFERENCE_OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/${Name}.reference_output) + else() + # otherwise just run the executable. + llvm_test_run() + endif() + llvm_test_executable(${_executable} ${_sources} TARGET_VAR _target) + if(VariantLibs) + target_link_libraries(${_target} ${VariantLibs}) + endif() + list(APPEND VARIANT_CUDA_TESTS ${_target}) +endmacro() + +# Create targets for CUDA tests that are part of the test suite. +macro(create_local_cuda_tests VariantSuffix) + create_one_local_test(axpy axpy.cu) + create_one_local_test(empty empty.cu) +endmacro() + +macro(thrust_make_test_name TestName TestSourcePath) + string(REPLACE "${THRUST_PATH}/testing/" "" _tmp ${TestSourcePath}) + string(REPLACE "/" "-" _tmp ${_tmp}) + string(REPLACE "<" "_" _tmp ${_tmp}) + string(REPLACE ">" "_" _tmp ${_tmp}) + string(REGEX REPLACE "\.(cpp|cu)$" "" _tmp ${_tmp}) + set(${TestName} ${_tmp}) +endmacro() + +macro(create_one_thrust_test TestSource) + thrust_make_test_name(_TestName ${TestSourcePath}) + set(_executable thrust-${_TestName}-${VariantSuffix}) + llvm_test_run(--verbose ${_ExtraThrustTestArgs}) + llvm_test_executable(${_executable} ${TestSource} TARGET_VAR _target) + target_link_libraries(${_target} ${VariantLibs}) + target_compile_options(${_target} PUBLIC ${THRUST_CPPFLAGS}) + list(APPEND THRUST_VARIANT_TESTS ${_target}) +endmacro() + +function(create_thrust_tests VariantSuffix) + set(_ThrustMainTarget thrust-${VariantSuffix}) + if(LARGE_PROBLEM_SIZE) + set(_ExtraThrustTestArgs "--sizes=large") + endif() + if(THRUST_SPLIT_TESTS) + # test framework is common for all tests, so we build it once as a + # library. + add_library(ThrustTestFrameworkLib-${VariantSuffix} STATIC ${ThrustTestFramework}) + append_compile_flags(ThrustTestFrameworkLib-${VariantSuffix} ${CPPFLAGS} ${THRUST_CPPFLAGS}) + add_dependencies(ThrustTestFrameworkLib-${VariantSuffix} timeit-host fpcmp-host) + list(APPEND VariantLibs ThrustTestFrameworkLib-${VariantSuffix}) + + # Create individual test executable per test source file. This + # stresses cmake -- it consumes tons of memory and takes forever + # to finish. + foreach(TestSourcePath IN LISTS ThrustAllTestSources) + create_one_thrust_test(${TestSourcePath}) + endforeach() + # Create target to build all thrust tests for this variant + add_custom_target(${_ThrustMainTarget} DEPENDS ${THRUST_VARIANT_TESTS} + COMMENT "Build CUDA test variant ${VariantSuffix}") + else() + # Single monolitic test executable. Alas this stresses linker + # during debug build. Final executable may end up being too large + # to link. + # We can create one test script per thrust test, but running + # thrust tests in parallel is bottlenecked by GPU and startup + # overhead, so it's actually way slower than running all tests + # sequentially. + llvm_test_run(--verbose ${_ExtraThrustTestArgs}) + llvm_test_executable(${_ThrustMainTarget} ${ThrustTestFramework} ${ThrustAllTestSources} + TARGET_VAR _target) + target_compile_options(${_target} PUBLIC ${THRUST_CPPFLAGS}) + target_link_libraries(${_target} ${VariantLibs}) + endif() + list(APPEND VARIANT_CUDA_TESTS ${_ThrustMainTarget}) + set(VARIANT_CUDA_TESTS ${VARIANT_CUDA_TESTS} PARENT_SCOPE) +endfunction() + +# Create set of tests for a given {CUDA,C++ standard,C++ library} tuple. +# Sets VARIANT_CUDA_TESTS targets in parent's scope. +function(create_cuda_test_variant VariantSuffix) + message(STATUS "Creating CUDA test variant ${VariantSuffix}") + + set(VariantLibs ${_Cuda_Libs} ${_Stdlib_Libs}) + list(APPEND CPPFLAGS ${_Cuda_CPPFLAGS} ${_Std_CPPFLAGS} ${_Stdlib_CPPFLAGS}) + list(APPEND LDFLAGS ${_Cuda_LDFLAGS} ${_Std_LDFLAGS} ${_Stdlib_LDFLAGS}) + + create_local_cuda_tests(${VariantSuffix}) + # Create a separate test target for simple tests that can be built/tested quickly. + add_custom_target(cuda-tests-simple-${VariantSuffix} DEPENDS ${VARIANT_CUDA_TESTS} + COMMENT "Build Simple CUDA tests for ${VariantSuffix}") + add_dependencies(cuda-tests-simple cuda-tests-simple-${VariantSuffix}) + set(VARIANT_CUDA_TESTS cuda-tests-simple-${VariantSuffix}) + + if(EXISTS ${THRUST_PATH}) + create_thrust_tests(${VariantSuffix}) + endif() + + # Create target to build all tests for this variant + add_custom_target(cuda-tests-${VariantSuffix} DEPENDS ${VARIANT_CUDA_TESTS} + COMMENT "Build CUDA test variant ${VariantSuffix}") + # And pass it up to the caller + set(VARIANT_CUDA_TESTS cuda-tests-${VariantSuffix} PARENT_SCOPE) +endfunction(create_cuda_test_variant) + +macro(create_cuda_tests) + message(STATUS "Checking CUDA prerequisites in ${TEST_SUITE_CUDA_ROOT}") + file(GLOB CudaVersions ${TEST_SUITE_CUDA_ROOT}/cuda-*) + list(SORT CudaVersions) + foreach(CudaDir IN LISTS CudaVersions) + get_version(CudaVersion ${CudaDir}) + message(STATUS "Found CUDA ${CudaVersion}") + list(APPEND CUDA_PATHS ${CudaDir}) + add_library(cudart-${CudaVersion} SHARED IMPORTED) + set_property(TARGET cudart-${CudaVersion} PROPERTY IMPORTED_LOCATION + ${CudaDir}/lib64/libcudart.so) + endforeach(CudaDir) + + if(NOT CUDA_PATHS) + message(SEND_ERROR + "There are no CUDA installations in ${TEST_SUITE_CUDA_ROOT}") + return() + endif() + + # Special target to build all simple tests. Useful for quick smoke test + # before we embark on heavy-duty compilation which may not be worth it. + add_custom_target(cuda-tests-simple + COMMENT "Build all simple CUDA tests") + + # set default GPU arch + if(NOT CUDA_GPU_ARCH) + list(APPEND CUDA_GPU_ARCH sm_35) + endif() + + foreach(GpuArch IN LISTS CUDA_GPU_ARCH) + list(APPEND CPPFLAGS --cuda-gpu-arch=${GpuArch}) + endforeach() + + file(GLOB GccVersions ${TEST_SUITE_CUDA_ROOT}/gcc-*) + list(SORT GccVersions) + foreach(GccDir IN LISTS GccVersions) + get_version(GccVersion ${GccDir}) + message(STATUS "Found GCC ${GccVersion}") + list(APPEND GCC_PATHS ${GccDir}) + add_library(libstdcxx-${GccVersion} SHARED IMPORTED) + set_property(TARGET libstdcxx-${GccVersion} PROPERTY IMPORTED_LOCATION + ${GccDir}/usr/local/lib64/libstdc++.so) + endforeach(GccDir) + + # Find location of libc++ + execute_process( + COMMAND ${CMAKE_CXX_COMPILER} -print-file-name=libc++.so + OUTPUT_VARIABLE _path_to_libcxx + OUTPUT_STRIP_TRAILING_WHITESPACE) + + if (EXISTS ${_path_to_libcxx}) + add_library(libcxx SHARED IMPORTED) + set_property(TARGET libcxx PROPERTY IMPORTED_LOCATION ${_path_to_libcxx}) + else() + message(ERROR "Can't find libcxx location.") + return() + endif() + + if(EXISTS "${TEST_SUITE_CUDA_ROOT}/thrust") + message(STATUS "Found Thrust ${THRUST_PATH}") + if(THRUST_SPLIT_TESTS) + message(WARNING + "############################################################\n" + "Split tests for thrust will take a while to generate... \n" + "############################################################\n") + endif() + set(THRUST_PATH "${TEST_SUITE_CUDA_ROOT}/thrust" CACHE + PATH "Thrust library path") + set(THRUST_CPPFLAGS + -O2 + -DTHRUST_HOST_SYSTEM=THRUST_HOST_SYSTEM_CPP + -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CUDA + -I${THRUST_PATH} -I${THRUST_PATH}/testing) + + cuda_glob(ThrustTestCppSources ${THRUST_PATH}/testing/*.cpp) + cuda_glob(ThrustTestCudaSources ${THRUST_PATH}/testing/*.cu) + cuda_glob(ThrustTestCudaBackendSources + ${THRUST_PATH}/testing/backend/decompose.cu + ${THRUST_PATH}/testing/backend/cuda/*.cu) + + list(APPEND ThrustAllTestSources ${ThrustTestCppSources} + ${ThrustTestCudaSources} ${ThrustTestCudaBackendSources}) + list(APPEND ThrustTestFramework + ${THRUST_PATH}/testing/testframework.cpp + ${THRUST_PATH}/testing/reduce_large.cu + ${THRUST_PATH}/testing/unittest_tester.cu + ${THRUST_PATH}/testing/backend/cuda/testframework.cu) + + # Remove test framework files from the list of test files. + foreach(File IN LISTS ThrustTestFramework) + list(REMOVE_ITEM ThrustAllTestSources ${File}) + endforeach() + endif() + + foreach(_CudaPath ${CUDA_PATHS}) + get_version(_CudaVersion ${_CudaPath}) + set(_Cuda_Suffix "cuda-${_CudaVersion}") + set(_Cuda_CPPFLAGS --cuda-path=${_CudaPath} -I${_CudaPath}/include) + set(_Cuda_Libs cudart-${_CudaVersion}) + foreach(_Std IN ITEMS "c++98" "c++11") + set(_Std_Suffix "${_Std}") + set(_Std_CPPFLAGS -std=${_Std}) + set(_Std_LDFLAGS -std=${_Std}) + foreach(_GccPath IN LISTS GCC_PATHS) + get_version(_GccVersion ${_GccPath}) + set(_Gcc_Suffix "libstdc++-${_GccVersion}") + # Tell clang to use libstdc++ and where to find it. + set(_Stdlib_CPPFLAGS -stdlib=libstdc++ -gcc-toolchain ${_GccPath}/usr/local) + set(_Stdlib_LDFLAGS -stdlib=libstdc++) + # Add libstdc++ as link dependency. + set(_Stdlib_Libs libstdcxx-${_GccVersion}) + + set(VARIANT_CUDA_TESTS) + create_cuda_test_variant("${_Cuda_Suffix}-${_Std_Suffix}-${_Gcc_Suffix}") + list(APPEND ALL_CUDA_TESTS ${VARIANT_CUDA_TESTS}) + endforeach() + + # Same as above, but for libc++ + # Tell clang to use libc++ + # We also need to add compiler's include path for cxxabi.h + get_filename_component(_compiler_path ${CMAKE_CXX_COMPILER} DIRECTORY) + set(_Stdlib_CPPFLAGS -stdlib=libc++ -I${_compiler_path}/../include) + set(_Stdlib_LDFLAGS -stdlib=libc++) + set(_Stdlib_Libs libcxx) + set(VARIANT_CUDA_TESTS) + create_cuda_test_variant("${_Cuda_Suffix}-${_Std_Suffix}-libc++") + list(APPEND ALL_CUDA_TESTS ${VARIANT_CUDA_TESTS}) + endforeach() + endforeach() + + # convenience target to build all CUDA tests. + add_custom_target(cuda-tests-all DEPENDS ${ALL_CUDA_TESTS} + COMMENT "Build all CUDA tests.") +endmacro(create_cuda_tests) + +if(TEST_SUITE_CUDA_ROOT) + create_cuda_tests() +endif() + Index: test-suite/trunk/External/CUDA/README =================================================================== --- test-suite/trunk/External/CUDA/README +++ test-suite/trunk/External/CUDA/README @@ -0,0 +1,55 @@ +CUDA Tests +========== + +Cuda tests are enabled if cmake is invoked with +-DTEST_SUITE_EXTERNALS_DIR= and specified externals +directory contains at least one CUDA installation. + +Expected externals directory structure: +Externals/ + cuda/ + cuda-X.Y/ -- One or more CUDA installation. + gcc-X.Y.Z/ -- One or more GCC installation for libstdc++. + +export EXTERNALS=/your/externals/path +export CUDA_EXTERNALS=$EXTERNALS/cuda +export CLANG_DIR=/your/clang/build/dir +export TEST_SUITE_DIR=/path/to/test-suite-sources + +* Externals installation + For each cuda version: + # bash cuda-linux64-rel-XXXXXX.run -prefix=$CUDA_EXTERNALS/cuda-7.5 -noprompt -nosymlink + + For each GCC version: + * extract GCC sources and cd into gcc-X.Y.Z + # ../configure --enable-languages=c,c++ --disable-libsanitizer + # make bootstrap + # make DESTDIR=$CUDA_EXTERNALS/gcc-X.Y.Z install + + Fetch thrust: + # cd $CUDA_EXTERNALS + # git clone https://github.com/thrust/thrust.git + +* Configure, build and run tests + Note that if externals are checked out into /test-suite-externals + there's no need to specify location explicitly with -DTEST_SUITE_EXTERNALS_DIR= + + # mkdir test-suite-build-dir + # cd test-suite-build-dir + # PATH=$CLANG_DIR/bin:$PATH CXX=clang++ CC=clang \ + cmake -G Ninja -DTEST_SUITE_EXTERNALS_DIR=$EXTERNALS $TEST_SUITE_DIR + # ninja cuda-tests-all + # lit -j1 Externals/CUDA + + For convenience there are top-level targets that would build only + one particular variant of the tests: + + cuda-tests- + + where is -- + + Similarly individual test variants can be built using the same convention: + + - + + E.g: thrust-cuda-7.5-c++98-libstdc++-4.9.3 OR axpy-cuda-7.0-c++11-libc++ Index: test-suite/trunk/External/CUDA/axpy.cu =================================================================== --- test-suite/trunk/External/CUDA/axpy.cu +++ test-suite/trunk/External/CUDA/axpy.cu @@ -0,0 +1,37 @@ +#include + +__global__ void axpy(float a, float* x, float* y) { + y[threadIdx.x] = a * x[threadIdx.x]; +} + +int main(int argc, char* argv[]) { + const int kDataLen = 4; + + float a = 2.0f; + float host_x[kDataLen] = {1.0f, 2.0f, 3.0f, 4.0f}; + float host_y[kDataLen]; + + // Copy input data to device. + float* device_x; + float* device_y; + cudaMalloc(&device_x, kDataLen * sizeof(float)); + cudaMalloc(&device_y, kDataLen * sizeof(float)); + cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), + cudaMemcpyHostToDevice); + + // Launch the kernel. + axpy<<<1, kDataLen>>>(a, device_x, device_y); + + // Copy output data to host. + cudaDeviceSynchronize(); + cudaMemcpy(host_y, device_y, kDataLen * sizeof(float), + cudaMemcpyDeviceToHost); + + // Print the results. + for (int i = 0; i < kDataLen; ++i) { + std::cout << "y[" << i << "] = " << host_y[i] << "\n"; + } + + cudaDeviceReset(); + return 0; +} Index: test-suite/trunk/External/CUDA/axpy.reference_output =================================================================== --- test-suite/trunk/External/CUDA/axpy.reference_output +++ test-suite/trunk/External/CUDA/axpy.reference_output @@ -0,0 +1,5 @@ +y[0] = 2 +y[1] = 4 +y[2] = 6 +y[3] = 8 +exit 0 Index: test-suite/trunk/External/CUDA/empty.cu =================================================================== --- test-suite/trunk/External/CUDA/empty.cu +++ test-suite/trunk/External/CUDA/empty.cu @@ -0,0 +1 @@ +int main(int argc, char **argv) { return 0; } Index: test-suite/trunk/External/CUDA/empty.reference_output =================================================================== --- test-suite/trunk/External/CUDA/empty.reference_output +++ test-suite/trunk/External/CUDA/empty.reference_output @@ -0,0 +1 @@ +exit 0 Index: test-suite/trunk/cmake/modules/TestFile.cmake =================================================================== --- test-suite/trunk/cmake/modules/TestFile.cmake +++ test-suite/trunk/cmake/modules/TestFile.cmake @@ -71,4 +71,6 @@ # Produce .test file file(GENERATE OUTPUT ${testfile} CONTENT "${TESTSCRIPT}") + # flush the test script + set(TESTSCRIPT "" PARENT_SCOPE) endfunction()