Index: External/CUDA/CMakeLists.txt =================================================================== --- External/CUDA/CMakeLists.txt +++ External/CUDA/CMakeLists.txt @@ -110,6 +110,7 @@ # buildbot a lot. create_one_local_test_f(simd simd.cu "cuda-(8[.]0|9[.]2)-c[+][+]11-libc[+][+]") + create_one_local_test(round round.cu) endmacro() macro(thrust_make_test_name TestName TestSourcePath) Index: External/CUDA/round.cu =================================================================== --- /dev/null +++ External/CUDA/round.cu @@ -0,0 +1,62 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +// Test the implementation of llvm intrinsic round. In particular, when the +// source is equidistant between two integers, it rounds away from zero. + +template +__global__ void round(T* x, T* y) { + y[threadIdx.x] = __builtin_roundf(x[threadIdx.x]); +} + +const int kDataLen = 4; + +template +void test_round(T* host_x, T* expected_y) { + T host_y[kDataLen]; + + // Copy input data to device. + T* device_x; + T* device_y; + cudaMalloc(&device_x, kDataLen * sizeof(T)); + cudaMalloc(&device_y, kDataLen * sizeof(T)); + cudaMemcpy(device_x, host_x, kDataLen * sizeof(T), + cudaMemcpyHostToDevice); + + // Launch the kernel. + round<<<1, kDataLen>>>(device_x, device_y); + + // Copy output data to host. + cudaDeviceSynchronize(); + cudaMemcpy(host_y, device_y, kDataLen * sizeof(T), + cudaMemcpyDeviceToHost); + + // Print and compare the results. + for (int i = 0; i < kDataLen; ++i) { + std::cout << "y[" << i << "] = " << host_y[i] << " " << expected_y[i] + << " " << host_y[i] - expected_y[i] << "\n"; + assert(abs((host_y[i] - expected_y[i])/expected_y[i]) < 0.001); + } +} + +int main(int argc, char* argv[]) { + + float float_x[kDataLen] = {-0.5f, 8.5f, -8.38861e+06f, 8.38861e+06f}; + float float_y[kDataLen] = {-1.0f, 9.0f, -8.38861e+06f, 8.38861e+06f}; + test_round(float_x, float_y); + + double double_x[kDataLen] = {0.5, -8.5, 4.5035996e+15, -4.5035996e+15}; + double double_y[kDataLen] = {1.0, -9.0, 4.5035996e+15, -4.5035996e+15}; + test_round(double_x, double_y); + + cudaDeviceReset(); + return 0; +}