Index: streamexecutor/CMakeLists.txt
===================================================================
--- streamexecutor/CMakeLists.txt
+++ streamexecutor/CMakeLists.txt
@@ -3,9 +3,14 @@
 option(STREAM_EXECUTOR_UNIT_TESTS "enable unit tests" ON)
 option(STREAM_EXECUTOR_ENABLE_DOXYGEN "enable StreamExecutor doxygen" ON)
 option(STREAM_EXECUTOR_ENABLE_CONFIG_TOOL "enable building streamexecutor-config tool" ON)
+option(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM "enable building the CUDA StreamExecutor platform" OFF)
+
+configure_file("include/streamexecutor/PlatformOptions.h.in" "include/streamexecutor/PlatformOptions.h")
 
 # First find includes relative to the streamexecutor top-level source path.
 include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/include)
+# Also look for configured headers in the top-level binary directory.
+include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/include)
 
 # If we are not building as part of LLVM, build StreamExecutor as a standalone
 # project using LLVM as an external library:
Index: streamexecutor/include/streamexecutor/PlatformOptions.h.in
===================================================================
--- /dev/null
+++ streamexecutor/include/streamexecutor/PlatformOptions.h.in
@@ -0,0 +1,6 @@
+#ifndef STREAMEXECUTOR_PLATFORMOPTIONS_H
+#define STREAMEXECUTOR_PLATFORMOPTIONS_H
+
+#cmakedefine STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM
+
+#endif // STREAMEXECUTOR_PLATFORMOPTIONS_H
Index: streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatform.h
===================================================================
--- /dev/null
+++ streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatform.h
@@ -0,0 +1,42 @@
+//===-- CUDAPlatform.h - CUDA platform subclass -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Declaration of the CUDAPlatform class.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORM_H
+#define STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORM_H
+
+#include "streamexecutor/Platform.h"
+#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h"
+
+#include "llvm/Support/Mutex.h"
+
+#include <map>
+
+namespace streamexecutor {
+namespace cuda {
+
+class CUDAPlatform : public Platform {
+public:
+  size_t getDeviceCount() const override;
+
+  Expected<Device> getDevice(size_t DeviceIndex) override;
+
+private:
+  llvm::sys::Mutex Mutex;
+  std::map<size_t, CUDAPlatformDevice> PlatformDevices;
+};
+
+} // namespace cuda
+} // namespace streamexecutor
+
+#endif // STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORM_H
Index: streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatformDevice.h
===================================================================
--- /dev/null
+++ streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatformDevice.h
@@ -0,0 +1,91 @@
+//===-- CUDAPlatformDevice.h - CUDAPlatformDevice class ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Declaration of the CUDAPlatformDevice class.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORMDEVICE_H
+#define STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORMDEVICE_H
+
+#include "streamexecutor/PlatformDevice.h"
+
+namespace streamexecutor {
+namespace cuda {
+
+Error CUresultToError(int CUResult);
+
+class CUDAPlatformDevice : public PlatformDevice {
+public:
+  static Expected<CUDAPlatformDevice> create(size_t DeviceIndex);
+
+  CUDAPlatformDevice(const CUDAPlatformDevice &) = delete;
+  CUDAPlatformDevice &operator=(const CUDAPlatformDevice &) = delete;
+
+  CUDAPlatformDevice(CUDAPlatformDevice &&) noexcept;
+  CUDAPlatformDevice &operator=(CUDAPlatformDevice &&) noexcept;
+
+  ~CUDAPlatformDevice() override;
+
+  std::string getName() const override { return "CUDA"; }
+
+  Expected<const void *>
+  createKernel(const MultiKernelLoaderSpec &Spec) override;
+  Error destroyKernel(const void *Handle) override;
+
+  Expected<const void *> createStream() override;
+  Error destroyStream(const void *Handle) override;
+
+  Error launch(const void *PlatformStreamHandle, BlockDimensions BlockSize,
+               GridDimensions GridSize, const void *PKernelHandle,
+               const PackedKernelArgumentArrayBase &ArgumentArray) override;
+
+  Error copyD2H(const void *PlatformStreamHandle, const void *DeviceSrcHandle,
+                size_t SrcByteOffset, void *HostDst, size_t DstByteOffset,
+                size_t ByteCount) override;
+
+  Error copyH2D(const void *PlatformStreamHandle, const void *HostSrc,
+                size_t SrcByteOffset, const void *DeviceDstHandle,
+                size_t DstByteOffset, size_t ByteCount) override;
+
+  Error copyD2D(const void *PlatformStreamHandle, const void *DeviceSrcHandle,
+                size_t SrcByteOffset, const void *DeviceDstHandle,
+                size_t DstByteOffset, size_t ByteCount) override;
+
+  Error blockHostUntilDone(const void *PlatformStreamHandle) override;
+
+  Expected<void *> allocateDeviceMemory(size_t ByteCount) override;
+  Error freeDeviceMemory(const void *Handle) override;
+
+  Error registerHostMemory(void *Memory, size_t ByteCount) override;
+  Error unregisterHostMemory(const void *Memory) override;
+
+  Error synchronousCopyD2H(const void *DeviceSrcHandle, size_t SrcByteOffset,
+                           void *HostDst, size_t DstByteOffset,
+                           size_t ByteCount) override;
+
+  Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset,
+                           const void *DeviceDstHandle, size_t DstByteOffset,
+                           size_t ByteCount) override;
+
+  Error synchronousCopyD2D(const void *DeviceDstHandle, size_t DstByteOffset,
+                           const void *DeviceSrcHandle, size_t SrcByteOffset,
+                           size_t ByteCount) override;
+
+private:
+  CUDAPlatformDevice(size_t DeviceIndex) : DeviceIndex(DeviceIndex) {}
+
+  int DeviceIndex;
+};
+
+} // namespace cuda
+} // namespace streamexecutor
+
+#endif // STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORMDEVICE_H
Index: streamexecutor/lib/CMakeLists.txt
===================================================================
--- streamexecutor/lib/CMakeLists.txt
+++ streamexecutor/lib/CMakeLists.txt
@@ -3,6 +3,26 @@
   set_target_properties(${name} PROPERTIES FOLDER "streamexecutor libraries")
 endmacro(add_se_library)
 
+if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM)
+    set(
+        CMAKE_MODULE_PATH
+        ${CMAKE_MODULE_PATH}
+        "${CMAKE_CURRENT_SOURCE_DIR}/platforms/cuda/cmake/modules/")
+
+    find_package(Libcuda REQUIRED)
+    include_directories(${LIBCUDA_INCLUDE_DIRS})
+
+    set(
+        STREAM_EXECUTOR_CUDA_PLATFORM_TARGET_OBJECT
+        $<TARGET_OBJECTS:streamexecutor_cuda_platform>)
+
+    set(
+        STREAM_EXECUTOR_LIBCUDA_LIBRARIES
+        ${LIBCUDA_LIBRARIES})
+endif(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM)
+
+add_subdirectory(platforms)
+
 add_se_library(
     streamexecutor
     Device.cpp
@@ -16,6 +36,8 @@
     PlatformDevice.cpp
     PlatformManager.cpp
     Stream.cpp
-    )
+    ${STREAM_EXECUTOR_CUDA_PLATFORM_TARGET_OBJECT}
+    LINK_LIBS
+    ${STREAM_EXECUTOR_LIBCUDA_LIBRARIES})
 
 install(TARGETS streamexecutor DESTINATION lib)
Index: streamexecutor/lib/PlatformManager.cpp
===================================================================
--- streamexecutor/lib/PlatformManager.cpp
+++ streamexecutor/lib/PlatformManager.cpp
@@ -13,8 +13,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "streamexecutor/PlatformManager.h"
+
+#include "streamexecutor/PlatformOptions.h"
 #include "streamexecutor/platforms/host/HostPlatform.h"
 
+#ifdef STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM
+#include "streamexecutor/platforms/cuda/CUDAPlatform.h"
+#endif
+
 namespace streamexecutor {
 
 PlatformManager::PlatformManager() {
@@ -26,6 +32,10 @@
   //    themselves when they are loaded.
 
   PlatformsByName.emplace("host", llvm::make_unique<host::HostPlatform>());
+
+#ifdef STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM
+  PlatformsByName.emplace("cuda", llvm::make_unique<cuda::CUDAPlatform>());
+#endif
 }
 
 Expected<Platform *> PlatformManager::getPlatformByName(llvm::StringRef Name) {
Index: streamexecutor/lib/platforms/CMakeLists.txt
===================================================================
--- /dev/null
+++ streamexecutor/lib/platforms/CMakeLists.txt
@@ -0,0 +1,3 @@
+if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM)
+    add_subdirectory(cuda)
+endif()
Index: streamexecutor/lib/platforms/cuda/CMakeLists.txt
===================================================================
--- /dev/null
+++ streamexecutor/lib/platforms/cuda/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_library(
+    streamexecutor_cuda_platform
+    OBJECT
+    CUDAPlatform.cpp
+    CUDAPlatformDevice.cpp)
Index: streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp
===================================================================
--- /dev/null
+++ streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp
@@ -0,0 +1,63 @@
+//===-- CUDAPlatform.cpp - CUDA platform implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation of CUDA platform internals.
+///
+//===----------------------------------------------------------------------===//
+
+#include "streamexecutor/platforms/cuda/CUDAPlatform.h"
+#include "streamexecutor/Device.h"
+#include "streamexecutor/Platform.h"
+#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h"
+
+#include "llvm/Support/Mutex.h"
+
+#include "cuda.h"
+
+#include <map>
+
+namespace streamexecutor {
+namespace cuda {
+
+size_t CUDAPlatform::getDeviceCount() const {
+  static CUresult InitResult = []() { return cuInit(0); }();
+
+  if (InitResult)
+    // TODO(jhen): Log an error.
+    return 0;
+
+  int DeviceCount = 0;
+  CUresult Result = cuDeviceGetCount(&DeviceCount);
+  // TODO(jhen): Log an error.
+
+  return DeviceCount;
+}
+
+Expected<Device> CUDAPlatform::getDevice(size_t DeviceIndex) {
+  static CUresult InitResult = []() { return cuInit(0); }();
+
+  if (InitResult)
+    return CUresultToError(InitResult);
+
+  llvm::sys::ScopedLock Lock(Mutex);
+  auto Iterator = PlatformDevices.find(DeviceIndex);
+  if (Iterator == PlatformDevices.end()) {
+    if (auto MaybePDevice = CUDAPlatformDevice::create(DeviceIndex)) {
+      Iterator =
+          PlatformDevices.emplace(DeviceIndex, std::move(*MaybePDevice)).first;
+    } else {
+      return MaybePDevice.takeError();
+    }
+  }
+  return Device(&Iterator->second);
+}
+
+} // namespace cuda
+} // namespace streamexecutor
Index: streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp
===================================================================
--- /dev/null
+++ streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp
@@ -0,0 +1,227 @@
+//===-- CUDAPlatformDevice.cpp - CUDAPlatformDevice implementation --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation of CUDAPlatformDevice.
+///
+//===----------------------------------------------------------------------===//
+
+#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h"
+#include "streamexecutor/PlatformDevice.h"
+
+#include "cuda.h"
+
+namespace streamexecutor {
+namespace cuda {
+
+static void *offset(const void *Base, size_t Offset) {
+  return const_cast<char *>(static_cast<const char *>(Base) + Offset);
+}
+
+Expected<CUDAPlatformDevice> CUDAPlatformDevice::create(size_t DeviceIndex) {
+  CUdevice DeviceHandle;
+  if (CUresult Result = cuDeviceGet(&DeviceHandle, DeviceIndex))
+    return CUresultToError(Result);
+
+  CUcontext ContextHandle;
+  if (CUresult Result = cuDevicePrimaryCtxRetain(&ContextHandle, DeviceHandle))
+    return CUresultToError(Result);
+
+  if (CUresult Result = cuCtxSetCurrent(ContextHandle))
+    return CUresultToError(Result);
+
+  return CUDAPlatformDevice(DeviceIndex);
+}
+
+CUDAPlatformDevice::CUDAPlatformDevice(CUDAPlatformDevice &&Other) noexcept
+    : DeviceIndex(Other.DeviceIndex) {
+  Other.DeviceIndex = -1;
+}
+
+CUDAPlatformDevice &CUDAPlatformDevice::
+operator=(CUDAPlatformDevice &&Other) noexcept {
+  DeviceIndex = Other.DeviceIndex;
+  Other.DeviceIndex = -1;
+  return *this;
+}
+
+CUDAPlatformDevice::~CUDAPlatformDevice() {
+  CUresult Result = cuDevicePrimaryCtxRelease(DeviceIndex);
+  // TODO(jhen): Log error.
+}
+
+Expected<const void *>
+CUDAPlatformDevice::createKernel(const MultiKernelLoaderSpec &Spec) {
+  // TODO(jhen): Maybe first check loaded modules?
+  if (!Spec.hasCUDAPTXInMemory())
+    return make_error("no CUDA source available to create kernel");
+
+  CUdevice Device = static_cast<int>(DeviceIndex);
+  int ComputeCapabilityMajor = 0;
+  int ComputeCapabilityMinor = 0;
+  if (CUresult Result = cuDeviceGetAttribute(
+          &ComputeCapabilityMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+          Device))
+    return CUresultToError(Result);
+  if (CUresult Result = cuDeviceGetAttribute(
+          &ComputeCapabilityMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+          Device))
+    return CUresultToError(Result);
+  const char *Code = Spec.getCUDAPTXInMemory().getCode(ComputeCapabilityMajor,
+                                                       ComputeCapabilityMinor);
+
+  if (!Code)
+    return make_error("no suitable CUDA source found for compute capability " +
+                      llvm::Twine(ComputeCapabilityMajor) + "." +
+                      llvm::Twine(ComputeCapabilityMinor));
+
+  CUmodule Module;
+  if (CUresult Result = cuModuleLoadData(&Module, Code))
+    return CUresultToError(Result);
+
+  CUfunction Function;
+  if (CUresult Result =
+          cuModuleGetFunction(&Function, Module, Spec.getKernelName().c_str()))
+    return CUresultToError(Result);
+
+  // TODO(jhen): Should I save this function pointer in case someone asks for
+  // it again?
+
+  // TODO(jhen): Should I save the module pointer so I can unload it when I
+  // destroy this device?
+
+  return static_cast<const void *>(Function);
+}
+
+Error CUDAPlatformDevice::destroyKernel(const void *Handle) {
+  // TODO(jhen): Maybe keep track of kernels for each module and unload the
+  // module after they are all destroyed.
+  return Error::success();
+}
+
+Expected<const void *> CUDAPlatformDevice::createStream() {
+  CUstream Stream;
+  if (CUresult Result = cuStreamCreate(&Stream, CU_STREAM_DEFAULT))
+    return CUresultToError(Result);
+  return Stream;
+}
+
+Error CUDAPlatformDevice::destroyStream(const void *Handle) {
+  return CUresultToError(
+      cuStreamDestroy(static_cast<CUstream>(const_cast<void *>(Handle))));
+}
+
+Error CUDAPlatformDevice::launch(
+    const void *PlatformStreamHandle, BlockDimensions BlockSize,
+    GridDimensions GridSize, const void *PKernelHandle,
+    const PackedKernelArgumentArrayBase &ArgumentArray) {
+  CUfunction Function =
+      reinterpret_cast<CUfunction>(const_cast<void *>(PKernelHandle));
+  CUstream Stream =
+      reinterpret_cast<CUstream>(const_cast<void *>(PlatformStreamHandle));
+  // TODO(jhen): Deal with shared memory arguments.
+  unsigned SharedMemoryBytes = 0;
+  void **ArgumentAddresses = const_cast<void **>(ArgumentArray.getAddresses());
+  return CUresultToError(cuLaunchKernel(
+      Function, GridSize.X, GridSize.Y, GridSize.Z, BlockSize.X, BlockSize.Y,
+      BlockSize.Z, SharedMemoryBytes, Stream, ArgumentAddresses, nullptr));
+}
+
+Error CUDAPlatformDevice::copyD2H(const void *PlatformStreamHandle,
+                                  const void *DeviceSrcHandle,
+                                  size_t SrcByteOffset, void *HostDst,
+                                  size_t DstByteOffset, size_t ByteCount) {
+  return CUresultToError(cuMemcpyDtoHAsync(
+      offset(HostDst, DstByteOffset),
+      reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)),
+      ByteCount,
+      static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))));
+}
+
+Error CUDAPlatformDevice::copyH2D(const void *PlatformStreamHandle,
+                                  const void *HostSrc, size_t SrcByteOffset,
+                                  const void *DeviceDstHandle,
+                                  size_t DstByteOffset, size_t ByteCount) {
+  return CUresultToError(cuMemcpyHtoDAsync(
+      reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
+      offset(HostSrc, SrcByteOffset), ByteCount,
+      static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))));
+}
+
+Error CUDAPlatformDevice::copyD2D(const void *PlatformStreamHandle,
+                                  const void *DeviceSrcHandle,
+                                  size_t SrcByteOffset,
+                                  const void *DeviceDstHandle,
+                                  size_t DstByteOffset, size_t ByteCount) {
+  return CUresultToError(cuMemcpyDtoDAsync(
+      reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
+      reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)),
+      ByteCount,
+      static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))));
+}
+
+Error CUDAPlatformDevice::blockHostUntilDone(const void *PlatformStreamHandle) {
+  return CUresultToError(cuStreamSynchronize(
+      static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))));
+}
+
+Expected<void *> CUDAPlatformDevice::allocateDeviceMemory(size_t ByteCount) {
+  CUdeviceptr Pointer;
+  if (CUresult Result = cuMemAlloc(&Pointer, ByteCount))
+    return CUresultToError(Result);
+  return reinterpret_cast<void *>(Pointer);
+}
+
+Error CUDAPlatformDevice::freeDeviceMemory(const void *Handle) {
+  return CUresultToError(cuMemFree(reinterpret_cast<CUdeviceptr>(Handle)));
+}
+
+Error CUDAPlatformDevice::registerHostMemory(void *Memory, size_t ByteCount) {
+  return CUresultToError(cuMemHostRegister(Memory, ByteCount, 0u));
+}
+
+Error CUDAPlatformDevice::unregisterHostMemory(const void *Memory) {
+  return CUresultToError(cuMemHostUnregister(const_cast<void *>(Memory)));
+}
+
+Error CUDAPlatformDevice::synchronousCopyD2H(const void *DeviceSrcHandle,
+                                             size_t SrcByteOffset,
+                                             void *HostDst,
+                                             size_t DstByteOffset,
+                                             size_t ByteCount) {
+  return CUresultToError(cuMemcpyDtoH(
+      offset(HostDst, DstByteOffset),
+      reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)),
+      ByteCount));
+}
+
+Error CUDAPlatformDevice::synchronousCopyH2D(const void *HostSrc,
+                                             size_t SrcByteOffset,
+                                             const void *DeviceDstHandle,
+                                             size_t DstByteOffset,
+                                             size_t ByteCount) {
+  return CUresultToError(cuMemcpyDtoH(
+      offset(DeviceDstHandle, DstByteOffset),
+      reinterpret_cast<CUdeviceptr>(offset(HostSrc, SrcByteOffset)),
+      ByteCount));
+}
+
+Error CUDAPlatformDevice::synchronousCopyD2D(const void *DeviceDstHandle,
+                                             size_t DstByteOffset,
+                                             const void *DeviceSrcHandle,
+                                             size_t SrcByteOffset,
+                                             size_t ByteCount) {
+  return CUresultToError(cuMemcpyDtoD(
+      reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
+      reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)),
+      ByteCount));
+}
+
+} // namespace cuda
+} // namespace streamexecutor
Index: streamexecutor/lib/platforms/cuda/cmake/modules/FindLibcuda.cmake
===================================================================
--- /dev/null
+++ streamexecutor/lib/platforms/cuda/cmake/modules/FindLibcuda.cmake
@@ -0,0 +1,19 @@
+# - Try to find the libcuda library
+# Once done this will define
+#  LIBCUDA_FOUND - System has libcuda
+#  LIBCUDA_INCLUDE_DIRS - The libcuda include directories
+#  LIBCUDA_LIBRARIES - The libraries needed to use libcuda
+
+find_path(LIBCUDA_INCLUDE_DIR cuda.h /usr/local/cuda/include)
+find_library(LIBCUDA_LIBRARY cuda)
+
+include(FindPackageHandleStandardArgs)
+# handle the QUIETLY and REQUIRED arguments and set LIBCUDA_FOUND to TRUE if
+# all listed variables are TRUE
+find_package_handle_standard_args(
+    LIBCUDA DEFAULT_MSG LIBCUDA_INCLUDE_DIR LIBCUDA_LIBRARY)
+
+mark_as_advanced(LIBCUDA_INCLUDE_DIR LIBCUDA_LIBRARY)
+
+set(LIBCUDA_LIBRARIES ${LIBCUDA_LIBRARY})
+set(LIBCUDA_INCLUDE_DIRS ${LIBCUDA_INCLUDE_DIR})