Index: parallel-libs/trunk/streamexecutor/include/streamexecutor/Device.h =================================================================== --- parallel-libs/trunk/streamexecutor/include/streamexecutor/Device.h +++ parallel-libs/trunk/streamexecutor/include/streamexecutor/Device.h @@ -0,0 +1,300 @@ +//===-- Device.h - The Device class -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// The Device class which represents a single device of a specific platform. +/// +//===----------------------------------------------------------------------===// + +#ifndef STREAMEXECUTOR_DEVICE_H +#define STREAMEXECUTOR_DEVICE_H + +#include "streamexecutor/KernelSpec.h" +#include "streamexecutor/PlatformInterfaces.h" +#include "streamexecutor/Utils/Error.h" + +namespace streamexecutor { + +class KernelInterface; +class Stream; + +class Device { +public: + explicit Device(PlatformDevice *PDevice); + virtual ~Device(); + + /// Gets the kernel implementation for the underlying platform. + virtual Expected> + getKernelImplementation(const MultiKernelLoaderSpec &Spec) { + // TODO(jhen): Implement this. + return nullptr; + } + + Expected> createStream(); + + /// Allocates an array of ElementCount entries of type T in device memory. + template + Expected> allocateDeviceMemory(size_t ElementCount) { + Expected MaybeBase = + PDevice->allocateDeviceMemory(ElementCount * sizeof(T)); + if (!MaybeBase) + return MaybeBase.takeError(); + return GlobalDeviceMemory(*MaybeBase); + } + + /// Frees memory previously allocated with allocateDeviceMemory. + template Error freeDeviceMemory(GlobalDeviceMemory Memory) { + return PDevice->freeDeviceMemory(Memory); + } + + /// Allocates an array of ElementCount entries of type T in host memory. + /// + /// Host memory allocated by this function can be used for asynchronous memory + /// copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D. + template Expected allocateHostMemory(size_t ElementCount) { + Expected MaybeMemory = + PDevice->allocateHostMemory(ElementCount * sizeof(T)); + if (!MaybeMemory) + return MaybeMemory.takeError(); + return static_cast(*MaybeMemory); + } + + /// Frees memory previously allocated with allocateHostMemory. + template Error freeHostMemory(T *Memory) { + return PDevice->freeHostMemory(Memory); + } + + /// Registers a previously allocated host array of type T for asynchronous + /// memory operations. + /// + /// Host memory registered by this function can be used for asynchronous + /// memory copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D. + template + Error registerHostMemory(T *Memory, size_t ElementCount) { + return PDevice->registerHostMemory(Memory, ElementCount * sizeof(T)); + } + + /// Unregisters host memory previously registered by registerHostMemory. + template Error unregisterHostMemory(T *Memory) { + return PDevice->unregisterHostMemory(Memory); + } + + /// \anchor DeviceHostSyncCopyGroup + /// \name Host-synchronous device memory copying functions + /// + /// These methods block the calling host thread while copying data to or from + /// device memory. On the device side, these methods do not block any ongoing + /// device calls. + /// + /// There are no restrictions on the host memory that is used as a source or + /// destination in these copy methods, so there is no need to allocate that + /// host memory using allocateHostMemory or register it with + /// registerHostMemory. + /// + /// Each of these methods has a single template parameter, T, that specifies + /// the type of data being copied. The ElementCount arguments specify the + /// number of objects of type T to be copied. + /// + /// For ease of use, each of the methods is overloaded to take either a + /// GlobalDeviceMemorySlice or a GlobalDeviceMemory argument in the device + /// memory argument slots, and the GlobalDeviceMemory arguments are just + /// converted to GlobalDeviceMemorySlice arguments internally by using + /// GlobalDeviceMemory::asSlice. + /// + /// These methods perform bounds checking to make sure that the ElementCount + /// is not too large for the source or destination. For methods that do not + /// take an ElementCount argument, an error is returned if the source size + /// does not exactly match the destination size. + ///@{ + + template + Error synchronousCopyD2H(GlobalDeviceMemorySlice Src, + llvm::MutableArrayRef Dst, size_t ElementCount) { + if (ElementCount > Src.getElementCount()) + return make_error("copying too many elements, " + + llvm::Twine(ElementCount) + + ", from a device array of element count " + + llvm::Twine(Src.getElementCount())); + if (ElementCount > Dst.size()) + return make_error( + "copying too many elements, " + llvm::Twine(ElementCount) + + ", to a host array of element count " + llvm::Twine(Dst.size())); + return PDevice->synchronousCopyD2H(Src.getBaseMemory(), + Src.getElementOffset() * sizeof(T), + Dst.data(), 0, ElementCount * sizeof(T)); + } + + template + Error synchronousCopyD2H(GlobalDeviceMemorySlice Src, + llvm::MutableArrayRef Dst) { + if (Src.getElementCount() != Dst.size()) + return make_error( + "array size mismatch for D2H, device source has element count " + + llvm::Twine(Src.getElementCount()) + + " but host destination has element count " + llvm::Twine(Dst.size())); + return synchronousCopyD2H(Src, Dst, Src.getElementCount()); + } + + template + Error synchronousCopyD2H(GlobalDeviceMemorySlice Src, T *Dst, + size_t ElementCount) { + return synchronousCopyD2H(Src, llvm::MutableArrayRef(Dst, ElementCount), + ElementCount); + } + + template + Error synchronousCopyD2H(GlobalDeviceMemory Src, + llvm::MutableArrayRef Dst, size_t ElementCount) { + return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount); + } + + template + Error synchronousCopyD2H(GlobalDeviceMemory Src, + llvm::MutableArrayRef Dst) { + return synchronousCopyD2H(Src.asSlice(), Dst); + } + + template + Error synchronousCopyD2H(GlobalDeviceMemory Src, T *Dst, + size_t ElementCount) { + return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount); + } + + template + Error synchronousCopyH2D(llvm::ArrayRef Src, + GlobalDeviceMemorySlice Dst, + size_t ElementCount) { + if (ElementCount > Src.size()) + return make_error( + "copying too many elements, " + llvm::Twine(ElementCount) + + ", from a host array of element count " + llvm::Twine(Src.size())); + if (ElementCount > Dst.getElementCount()) + return make_error("copying too many elements, " + + llvm::Twine(ElementCount) + + ", to a device array of element count " + + llvm::Twine(Dst.getElementCount())); + return PDevice->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(), + Dst.getElementOffset() * sizeof(T), + ElementCount * sizeof(T)); + } + + template + Error synchronousCopyH2D(llvm::ArrayRef Src, + GlobalDeviceMemorySlice Dst) { + if (Src.size() != Dst.getElementCount()) + return make_error( + "array size mismatch for H2D, host source has element count " + + llvm::Twine(Src.size()) + + " but device destination has element count " + + llvm::Twine(Dst.getElementCount())); + return synchronousCopyH2D(Src, Dst, Dst.getElementCount()); + } + + template + Error synchronousCopyH2D(T *Src, GlobalDeviceMemorySlice Dst, + size_t ElementCount) { + return synchronousCopyH2D(llvm::ArrayRef(Src, ElementCount), Dst, + ElementCount); + } + + template + Error synchronousCopyH2D(llvm::ArrayRef Src, GlobalDeviceMemory Dst, + size_t ElementCount) { + return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount); + } + + template + Error synchronousCopyH2D(llvm::ArrayRef Src, GlobalDeviceMemory Dst) { + return synchronousCopyH2D(Src, Dst.asSlice()); + } + + template + Error synchronousCopyH2D(T *Src, GlobalDeviceMemory Dst, + size_t ElementCount) { + return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount); + } + + template + Error synchronousCopyD2D(GlobalDeviceMemorySlice Src, + GlobalDeviceMemorySlice Dst, + size_t ElementCount) { + if (ElementCount > Src.getElementCount()) + return make_error("copying too many elements, " + + llvm::Twine(ElementCount) + + ", from a device array of element count " + + llvm::Twine(Src.getElementCount())); + if (ElementCount > Dst.getElementCount()) + return make_error("copying too many elements, " + + llvm::Twine(ElementCount) + + ", to a device array of element count " + + llvm::Twine(Dst.getElementCount())); + return PDevice->synchronousCopyD2D( + Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), + Dst.getBaseMemory(), Dst.getElementOffset() * sizeof(T), + ElementCount * sizeof(T)); + } + + template + Error synchronousCopyD2D(GlobalDeviceMemorySlice Src, + GlobalDeviceMemorySlice Dst) { + if (Src.getElementCount() != Dst.getElementCount()) + return make_error( + "array size mismatch for D2D, device source has element count " + + llvm::Twine(Src.getElementCount()) + + " but device destination has element count " + + llvm::Twine(Dst.getElementCount())); + return synchronousCopyD2D(Src, Dst, Src.getElementCount()); + } + + template + Error synchronousCopyD2D(GlobalDeviceMemory Src, + GlobalDeviceMemorySlice Dst, + size_t ElementCount) { + return synchronousCopyD2D(Src.asSlice(), Dst, ElementCount); + } + + template + Error synchronousCopyD2D(GlobalDeviceMemory Src, + GlobalDeviceMemorySlice Dst) { + return synchronousCopyD2D(Src.asSlice(), Dst); + } + + template + Error synchronousCopyD2D(GlobalDeviceMemorySlice Src, + GlobalDeviceMemory Dst, size_t ElementCount) { + return synchronousCopyD2D(Src, Dst.asSlice(), ElementCount); + } + + template + Error synchronousCopyD2D(GlobalDeviceMemorySlice Src, + GlobalDeviceMemory Dst) { + return synchronousCopyD2D(Src, Dst.asSlice()); + } + + template + Error synchronousCopyD2D(GlobalDeviceMemory Src, GlobalDeviceMemory Dst, + size_t ElementCount) { + return synchronousCopyD2D(Src.asSlice(), Dst.asSlice(), ElementCount); + } + + template + Error synchronousCopyD2D(GlobalDeviceMemory Src, + GlobalDeviceMemory Dst) { + return synchronousCopyD2D(Src.asSlice(), Dst.asSlice()); + } + + ///@} End host-synchronous device memory copying functions + +private: + PlatformDevice *PDevice; +}; + +} // namespace streamexecutor + +#endif // STREAMEXECUTOR_DEVICE_H Index: parallel-libs/trunk/streamexecutor/include/streamexecutor/Executor.h =================================================================== --- parallel-libs/trunk/streamexecutor/include/streamexecutor/Executor.h +++ parallel-libs/trunk/streamexecutor/include/streamexecutor/Executor.h @@ -1,300 +0,0 @@ -//===-- Executor.h - The Executor class -------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// The Executor class which represents a single device of a specific platform. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_EXECUTOR_H -#define STREAMEXECUTOR_EXECUTOR_H - -#include "streamexecutor/KernelSpec.h" -#include "streamexecutor/PlatformInterfaces.h" -#include "streamexecutor/Utils/Error.h" - -namespace streamexecutor { - -class KernelInterface; -class Stream; - -class Executor { -public: - explicit Executor(PlatformExecutor *PExecutor); - virtual ~Executor(); - - /// Gets the kernel implementation for the underlying platform. - virtual Expected> - getKernelImplementation(const MultiKernelLoaderSpec &Spec) { - // TODO(jhen): Implement this. - return nullptr; - } - - Expected> createStream(); - - /// Allocates an array of ElementCount entries of type T in device memory. - template - Expected> allocateDeviceMemory(size_t ElementCount) { - Expected MaybeBase = - PExecutor->allocateDeviceMemory(ElementCount * sizeof(T)); - if (!MaybeBase) - return MaybeBase.takeError(); - return GlobalDeviceMemory(*MaybeBase); - } - - /// Frees memory previously allocated with allocateDeviceMemory. - template Error freeDeviceMemory(GlobalDeviceMemory Memory) { - return PExecutor->freeDeviceMemory(Memory); - } - - /// Allocates an array of ElementCount entries of type T in host memory. - /// - /// Host memory allocated by this function can be used for asynchronous memory - /// copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D. - template Expected allocateHostMemory(size_t ElementCount) { - Expected MaybeMemory = - PExecutor->allocateHostMemory(ElementCount * sizeof(T)); - if (!MaybeMemory) - return MaybeMemory.takeError(); - return static_cast(*MaybeMemory); - } - - /// Frees memory previously allocated with allocateHostMemory. - template Error freeHostMemory(T *Memory) { - return PExecutor->freeHostMemory(Memory); - } - - /// Registers a previously allocated host array of type T for asynchronous - /// memory operations. - /// - /// Host memory registered by this function can be used for asynchronous - /// memory copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D. - template - Error registerHostMemory(T *Memory, size_t ElementCount) { - return PExecutor->registerHostMemory(Memory, ElementCount * sizeof(T)); - } - - /// Unregisters host memory previously registered by registerHostMemory. - template Error unregisterHostMemory(T *Memory) { - return PExecutor->unregisterHostMemory(Memory); - } - - /// \anchor ExecutorHostSyncCopyGroup - /// \name Host-synchronous device memory copying functions - /// - /// These methods block the calling host thread while copying data to or from - /// device memory. On the device side, these methods do not block any ongoing - /// device calls. - /// - /// There are no restrictions on the host memory that is used as a source or - /// destination in these copy methods, so there is no need to allocate that - /// host memory using allocateHostMemory or register it with - /// registerHostMemory. - /// - /// Each of these methods has a single template parameter, T, that specifies - /// the type of data being copied. The ElementCount arguments specify the - /// number of objects of type T to be copied. - /// - /// For ease of use, each of the methods is overloaded to take either a - /// GlobalDeviceMemorySlice or a GlobalDeviceMemory argument in the device - /// memory argument slots, and the GlobalDeviceMemory arguments are just - /// converted to GlobalDeviceMemorySlice arguments internally by using - /// GlobalDeviceMemory::asSlice. - /// - /// These methods perform bounds checking to make sure that the ElementCount - /// is not too large for the source or destination. For methods that do not - /// take an ElementCount argument, an error is returned if the source size - /// does not exactly match the destination size. - ///@{ - - template - Error synchronousCopyD2H(GlobalDeviceMemorySlice Src, - llvm::MutableArrayRef Dst, size_t ElementCount) { - if (ElementCount > Src.getElementCount()) - return make_error("copying too many elements, " + - llvm::Twine(ElementCount) + - ", from a device array of element count " + - llvm::Twine(Src.getElementCount())); - if (ElementCount > Dst.size()) - return make_error( - "copying too many elements, " + llvm::Twine(ElementCount) + - ", to a host array of element count " + llvm::Twine(Dst.size())); - return PExecutor->synchronousCopyD2H( - Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), Dst.data(), 0, - ElementCount * sizeof(T)); - } - - template - Error synchronousCopyD2H(GlobalDeviceMemorySlice Src, - llvm::MutableArrayRef Dst) { - if (Src.getElementCount() != Dst.size()) - return make_error( - "array size mismatch for D2H, device source has element count " + - llvm::Twine(Src.getElementCount()) + - " but host destination has element count " + llvm::Twine(Dst.size())); - return synchronousCopyD2H(Src, Dst, Src.getElementCount()); - } - - template - Error synchronousCopyD2H(GlobalDeviceMemorySlice Src, T *Dst, - size_t ElementCount) { - return synchronousCopyD2H(Src, llvm::MutableArrayRef(Dst, ElementCount), - ElementCount); - } - - template - Error synchronousCopyD2H(GlobalDeviceMemory Src, - llvm::MutableArrayRef Dst, size_t ElementCount) { - return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount); - } - - template - Error synchronousCopyD2H(GlobalDeviceMemory Src, - llvm::MutableArrayRef Dst) { - return synchronousCopyD2H(Src.asSlice(), Dst); - } - - template - Error synchronousCopyD2H(GlobalDeviceMemory Src, T *Dst, - size_t ElementCount) { - return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount); - } - - template - Error synchronousCopyH2D(llvm::ArrayRef Src, - GlobalDeviceMemorySlice Dst, - size_t ElementCount) { - if (ElementCount > Src.size()) - return make_error( - "copying too many elements, " + llvm::Twine(ElementCount) + - ", from a host array of element count " + llvm::Twine(Src.size())); - if (ElementCount > Dst.getElementCount()) - return make_error("copying too many elements, " + - llvm::Twine(ElementCount) + - ", to a device array of element count " + - llvm::Twine(Dst.getElementCount())); - return PExecutor->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(), - Dst.getElementOffset() * sizeof(T), - ElementCount * sizeof(T)); - } - - template - Error synchronousCopyH2D(llvm::ArrayRef Src, - GlobalDeviceMemorySlice Dst) { - if (Src.size() != Dst.getElementCount()) - return make_error( - "array size mismatch for H2D, host source has element count " + - llvm::Twine(Src.size()) + - " but device destination has element count " + - llvm::Twine(Dst.getElementCount())); - return synchronousCopyH2D(Src, Dst, Dst.getElementCount()); - } - - template - Error synchronousCopyH2D(T *Src, GlobalDeviceMemorySlice Dst, - size_t ElementCount) { - return synchronousCopyH2D(llvm::ArrayRef(Src, ElementCount), Dst, - ElementCount); - } - - template - Error synchronousCopyH2D(llvm::ArrayRef Src, GlobalDeviceMemory Dst, - size_t ElementCount) { - return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount); - } - - template - Error synchronousCopyH2D(llvm::ArrayRef Src, GlobalDeviceMemory Dst) { - return synchronousCopyH2D(Src, Dst.asSlice()); - } - - template - Error synchronousCopyH2D(T *Src, GlobalDeviceMemory Dst, - size_t ElementCount) { - return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount); - } - - template - Error synchronousCopyD2D(GlobalDeviceMemorySlice Src, - GlobalDeviceMemorySlice Dst, - size_t ElementCount) { - if (ElementCount > Src.getElementCount()) - return make_error("copying too many elements, " + - llvm::Twine(ElementCount) + - ", from a device array of element count " + - llvm::Twine(Src.getElementCount())); - if (ElementCount > Dst.getElementCount()) - return make_error("copying too many elements, " + - llvm::Twine(ElementCount) + - ", to a device array of element count " + - llvm::Twine(Dst.getElementCount())); - return PExecutor->synchronousCopyD2D( - Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), - Dst.getBaseMemory(), Dst.getElementOffset() * sizeof(T), - ElementCount * sizeof(T)); - } - - template - Error synchronousCopyD2D(GlobalDeviceMemorySlice Src, - GlobalDeviceMemorySlice Dst) { - if (Src.getElementCount() != Dst.getElementCount()) - return make_error( - "array size mismatch for D2D, device source has element count " + - llvm::Twine(Src.getElementCount()) + - " but device destination has element count " + - llvm::Twine(Dst.getElementCount())); - return synchronousCopyD2D(Src, Dst, Src.getElementCount()); - } - - template - Error synchronousCopyD2D(GlobalDeviceMemory Src, - GlobalDeviceMemorySlice Dst, - size_t ElementCount) { - return synchronousCopyD2D(Src.asSlice(), Dst, ElementCount); - } - - template - Error synchronousCopyD2D(GlobalDeviceMemory Src, - GlobalDeviceMemorySlice Dst) { - return synchronousCopyD2D(Src.asSlice(), Dst); - } - - template - Error synchronousCopyD2D(GlobalDeviceMemorySlice Src, - GlobalDeviceMemory Dst, size_t ElementCount) { - return synchronousCopyD2D(Src, Dst.asSlice(), ElementCount); - } - - template - Error synchronousCopyD2D(GlobalDeviceMemorySlice Src, - GlobalDeviceMemory Dst) { - return synchronousCopyD2D(Src, Dst.asSlice()); - } - - template - Error synchronousCopyD2D(GlobalDeviceMemory Src, GlobalDeviceMemory Dst, - size_t ElementCount) { - return synchronousCopyD2D(Src.asSlice(), Dst.asSlice(), ElementCount); - } - - template - Error synchronousCopyD2D(GlobalDeviceMemory Src, - GlobalDeviceMemory Dst) { - return synchronousCopyD2D(Src.asSlice(), Dst.asSlice()); - } - - ///@} End host-synchronous device memory copying functions - -private: - PlatformExecutor *PExecutor; -}; - -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_EXECUTOR_H Index: parallel-libs/trunk/streamexecutor/include/streamexecutor/Kernel.h =================================================================== --- parallel-libs/trunk/streamexecutor/include/streamexecutor/Kernel.h +++ parallel-libs/trunk/streamexecutor/include/streamexecutor/Kernel.h @@ -54,13 +54,13 @@ /// function as follows: /// \code /// namespace ccn = compiler_cuda_namespace; -/// // Assumes Executor is a pointer to the StreamExecutor on which to -/// // launch the kernel. +/// // Assumes Device is a pointer to the Device on which to launch the +/// // kernel. /// // /// // See KernelSpec.h for details on how the compiler can create a /// // MultiKernelLoaderSpec instance like SaxpyKernelLoaderSpec below. /// Expected MaybeKernel = -/// ccn::SaxpyKernel::create(Executor, ccn::SaxpyKernelLoaderSpec); +/// ccn::SaxpyKernel::create(Device, ccn::SaxpyKernelLoaderSpec); /// if (!MaybeKernel) { /* Handle error */ } /// ccn::SaxpyKernel SaxpyKernel = *MaybeKernel; /// Launch(SaxpyKernel, A, X, Y); @@ -84,7 +84,7 @@ namespace streamexecutor { -class Executor; +class Device; class KernelInterface; /// The base class for device kernel functions. @@ -100,13 +100,13 @@ KernelBase &operator=(KernelBase &&) = default; ~KernelBase(); - /// Creates a kernel object from an Executor and a MultiKernelLoaderSpec. + /// Creates a kernel object from a Device and a MultiKernelLoaderSpec. /// - /// The Executor knows which platform it belongs to and the + /// The Device knows which platform it belongs to and the /// MultiKernelLoaderSpec knows how to find the kernel code for different /// platforms, so the combined information is enough to get the kernel code /// for the appropriate platform. - static Expected create(Executor *ParentExecutor, + static Expected create(Device *Dev, const MultiKernelLoaderSpec &Spec); const std::string &getName() const { return Name; } @@ -116,11 +116,11 @@ KernelInterface *getImplementation() { return Implementation.get(); } private: - KernelBase(Executor *ParentExecutor, const std::string &Name, + KernelBase(Device *Dev, const std::string &Name, const std::string &DemangledName, std::unique_ptr Implementation); - Executor *ParentExecutor; + Device *TheDevice; std::string Name; std::string DemangledName; std::unique_ptr Implementation; @@ -136,9 +136,9 @@ TypedKernel &operator=(TypedKernel &&) = default; /// Parameters here have the same meaning as in KernelBase::create. - static Expected create(Executor *ParentExecutor, + static Expected create(Device *Dev, const MultiKernelLoaderSpec &Spec) { - auto MaybeBase = KernelBase::create(ParentExecutor, Spec); + auto MaybeBase = KernelBase::create(Dev, Spec); if (!MaybeBase) { return MaybeBase.takeError(); } Index: parallel-libs/trunk/streamexecutor/include/streamexecutor/PlatformInterfaces.h =================================================================== --- parallel-libs/trunk/streamexecutor/include/streamexecutor/PlatformInterfaces.h +++ parallel-libs/trunk/streamexecutor/include/streamexecutor/PlatformInterfaces.h @@ -31,7 +31,7 @@ namespace streamexecutor { -class PlatformExecutor; +class PlatformDevice; /// Methods supported by device kernel function objects on all platforms. class KernelInterface { @@ -41,15 +41,14 @@ /// Platform-specific stream handle. class PlatformStreamHandle { public: - explicit PlatformStreamHandle(PlatformExecutor *PExecutor) - : PExecutor(PExecutor) {} + explicit PlatformStreamHandle(PlatformDevice *PDevice) : PDevice(PDevice) {} virtual ~PlatformStreamHandle(); - PlatformExecutor *getExecutor() { return PExecutor; } + PlatformDevice *getDevice() { return PDevice; } private: - PlatformExecutor *PExecutor; + PlatformDevice *PDevice; }; /// Raw executor methods that must be implemented by each platform. @@ -57,11 +56,11 @@ /// This class defines the platform interface that supports executing work on a /// device. /// -/// The public Executor and Stream classes have the type-safe versions of the +/// The public Device and Stream classes have the type-safe versions of the /// functions in this interface. -class PlatformExecutor { +class PlatformDevice { public: - virtual ~PlatformExecutor(); + virtual ~PlatformDevice(); virtual std::string getName() const = 0; Index: parallel-libs/trunk/streamexecutor/include/streamexecutor/Stream.h =================================================================== --- parallel-libs/trunk/streamexecutor/include/streamexecutor/Stream.h +++ parallel-libs/trunk/streamexecutor/include/streamexecutor/Stream.h @@ -12,19 +12,18 @@ /// A Stream instance represents a queue of sequential, host-asynchronous work /// to be performed on a device. /// -/// To enqueue work on a device, first create a Executor instance for a -/// given device and then use that Executor to create a Stream instance. -/// The Stream instance will perform its work on the device managed by the -/// Executor that created it. +/// To enqueue work on a device, first create a Device instance then use that +/// Device to create a Stream instance. The Stream instance will perform its +/// work on the device managed by the Device object that created it. /// /// The various "then" methods of the Stream object, such as thenCopyH2D and /// thenLaunch, may be used to enqueue work on the Stream, and the /// blockHostUntilDone() method may be used to block the host code until the /// Stream has completed all its work. /// -/// Multiple Stream instances can be created for the same Executor. This -/// allows several independent streams of computation to be performed -/// simultaneously on a single device. +/// Multiple Stream instances can be created for the same Device. This allows +/// several independent streams of computation to be performed simultaneously on +/// a single device. /// //===----------------------------------------------------------------------===// @@ -94,8 +93,8 @@ const ParameterTs &... Arguments) { auto ArgumentArray = make_kernel_argument_pack(Arguments...); - setError(PExecutor->launch(ThePlatformStream.get(), BlockSize, GridSize, - Kernel, ArgumentArray)); + setError(PDevice->launch(ThePlatformStream.get(), BlockSize, GridSize, + Kernel, ArgumentArray)); return *this; } @@ -105,13 +104,13 @@ /// return without waiting for the operation to complete. /// /// Any host memory used as a source or destination for one of these - /// operations must be allocated with Executor::allocateHostMemory or - /// registered with Executor::registerHostMemory. Otherwise, the enqueuing - /// operation may block until the copy operation is fully complete. + /// operations must be allocated with Device::allocateHostMemory or registered + /// with Device::registerHostMemory. Otherwise, the enqueuing operation may + /// block until the copy operation is fully complete. /// /// The arguments and bounds checking for these methods match the API of the - /// \ref ExecutorHostSyncCopyGroup - /// "host-synchronous device memory copying functions" of Executor. + /// \ref DeviceHostSyncCopyGroup + /// "host-synchronous device memory copying functions" of Device. ///@{ template @@ -125,9 +124,9 @@ setError("copying too many elements, " + llvm::Twine(ElementCount) + ", to a host array of element count " + llvm::Twine(Dst.size())); else - setError(PExecutor->copyD2H(ThePlatformStream.get(), Src.getBaseMemory(), - Src.getElementOffset() * sizeof(T), - Dst.data(), 0, ElementCount * sizeof(T))); + setError(PDevice->copyD2H(ThePlatformStream.get(), Src.getBaseMemory(), + Src.getElementOffset() * sizeof(T), Dst.data(), + 0, ElementCount * sizeof(T))); return *this; } @@ -182,7 +181,7 @@ ", to a device array of element count " + llvm::Twine(Dst.getElementCount())); else - setError(PExecutor->copyH2D( + setError(PDevice->copyH2D( ThePlatformStream.get(), Src.data(), 0, Dst.getBaseMemory(), Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T))); return *this; @@ -238,7 +237,7 @@ ", to a device array of element count " + llvm::Twine(Dst.getElementCount())); else - setError(PExecutor->copyD2D( + setError(PDevice->copyD2D( ThePlatformStream.get(), Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), Dst.getBaseMemory(), Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T))); @@ -322,8 +321,8 @@ ErrorMessage = Message.str(); } - /// The PlatformExecutor that supports the operations of this stream. - PlatformExecutor *PExecutor; + /// The PlatformDevice that supports the operations of this stream. + PlatformDevice *PDevice; /// The platform-specific stream handle for this instance. std::unique_ptr ThePlatformStream; Index: parallel-libs/trunk/streamexecutor/lib/CMakeLists.txt =================================================================== --- parallel-libs/trunk/streamexecutor/lib/CMakeLists.txt +++ parallel-libs/trunk/streamexecutor/lib/CMakeLists.txt @@ -6,7 +6,7 @@ add_library( streamexecutor $ - Executor.cpp + Device.cpp Kernel.cpp KernelSpec.cpp PackedKernelArgumentArray.cpp Index: parallel-libs/trunk/streamexecutor/lib/Device.cpp =================================================================== --- parallel-libs/trunk/streamexecutor/lib/Device.cpp +++ parallel-libs/trunk/streamexecutor/lib/Device.cpp @@ -0,0 +1,41 @@ +//===-- Device.cpp - Device implementation --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implementation of Device class internals. +/// +//===----------------------------------------------------------------------===// + +#include "streamexecutor/Device.h" + +#include + +#include "streamexecutor/PlatformInterfaces.h" +#include "streamexecutor/Stream.h" + +#include "llvm/ADT/STLExtras.h" + +namespace streamexecutor { + +Device::Device(PlatformDevice *PDevice) : PDevice(PDevice) {} + +Device::~Device() = default; + +Expected> Device::createStream() { + Expected> MaybePlatformStream = + PDevice->createStream(); + if (!MaybePlatformStream) { + return MaybePlatformStream.takeError(); + } + assert((*MaybePlatformStream)->getDevice() == PDevice && + "an executor created a stream with a different stored executor"); + return llvm::make_unique(std::move(*MaybePlatformStream)); +} + +} // namespace streamexecutor Index: parallel-libs/trunk/streamexecutor/lib/Executor.cpp =================================================================== --- parallel-libs/trunk/streamexecutor/lib/Executor.cpp +++ parallel-libs/trunk/streamexecutor/lib/Executor.cpp @@ -1,41 +0,0 @@ -//===-- Executor.cpp - Executor implementation ----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of Executor class internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/Executor.h" - -#include - -#include "streamexecutor/PlatformInterfaces.h" -#include "streamexecutor/Stream.h" - -#include "llvm/ADT/STLExtras.h" - -namespace streamexecutor { - -Executor::Executor(PlatformExecutor *PExecutor) : PExecutor(PExecutor) {} - -Executor::~Executor() = default; - -Expected> Executor::createStream() { - Expected> MaybePlatformStream = - PExecutor->createStream(); - if (!MaybePlatformStream) { - return MaybePlatformStream.takeError(); - } - assert((*MaybePlatformStream)->getExecutor() == PExecutor && - "an executor created a stream with a different stored executor"); - return llvm::make_unique(std::move(*MaybePlatformStream)); -} - -} // namespace streamexecutor Index: parallel-libs/trunk/streamexecutor/lib/Kernel.cpp =================================================================== --- parallel-libs/trunk/streamexecutor/lib/Kernel.cpp +++ parallel-libs/trunk/streamexecutor/lib/Kernel.cpp @@ -13,31 +13,31 @@ //===----------------------------------------------------------------------===// #include "streamexecutor/Kernel.h" -#include "streamexecutor/Executor.h" +#include "streamexecutor/Device.h" #include "streamexecutor/PlatformInterfaces.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" namespace streamexecutor { -KernelBase::KernelBase(Executor *ParentExecutor, const std::string &Name, +KernelBase::KernelBase(Device *Dev, const std::string &Name, const std::string &DemangledName, std::unique_ptr Implementation) - : ParentExecutor(ParentExecutor), Name(Name), DemangledName(DemangledName), + : TheDevice(Dev), Name(Name), DemangledName(DemangledName), Implementation(std::move(Implementation)) {} KernelBase::~KernelBase() = default; -Expected KernelBase::create(Executor *ParentExecutor, +Expected KernelBase::create(Device *Dev, const MultiKernelLoaderSpec &Spec) { - auto MaybeImplementation = ParentExecutor->getKernelImplementation(Spec); + auto MaybeImplementation = Dev->getKernelImplementation(Spec); if (!MaybeImplementation) { return MaybeImplementation.takeError(); } std::string Name = Spec.getKernelName(); std::string DemangledName = llvm::symbolize::LLVMSymbolizer::DemangleName(Name, nullptr); - KernelBase Instance(ParentExecutor, Name, DemangledName, + KernelBase Instance(Dev, Name, DemangledName, std::move(*MaybeImplementation)); return std::move(Instance); } Index: parallel-libs/trunk/streamexecutor/lib/PlatformInterfaces.cpp =================================================================== --- parallel-libs/trunk/streamexecutor/lib/PlatformInterfaces.cpp +++ parallel-libs/trunk/streamexecutor/lib/PlatformInterfaces.cpp @@ -18,6 +18,6 @@ PlatformStreamHandle::~PlatformStreamHandle() = default; -PlatformExecutor::~PlatformExecutor() = default; +PlatformDevice::~PlatformDevice() = default; } // namespace streamexecutor Index: parallel-libs/trunk/streamexecutor/lib/Stream.cpp =================================================================== --- parallel-libs/trunk/streamexecutor/lib/Stream.cpp +++ parallel-libs/trunk/streamexecutor/lib/Stream.cpp @@ -17,8 +17,7 @@ namespace streamexecutor { Stream::Stream(std::unique_ptr PStream) - : PExecutor(PStream->getExecutor()), ThePlatformStream(std::move(PStream)) { -} + : PDevice(PStream->getDevice()), ThePlatformStream(std::move(PStream)) {} Stream::~Stream() = default; Index: parallel-libs/trunk/streamexecutor/lib/unittests/CMakeLists.txt =================================================================== --- parallel-libs/trunk/streamexecutor/lib/unittests/CMakeLists.txt +++ parallel-libs/trunk/streamexecutor/lib/unittests/CMakeLists.txt @@ -1,12 +1,12 @@ add_executable( - executor_test - ExecutorTest.cpp) + device_test + DeviceTest.cpp) target_link_libraries( - executor_test + device_test streamexecutor ${GTEST_BOTH_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) -add_test(ExecutorTest executor_test) +add_test(DeviceTest device_test) add_executable( kernel_test Index: parallel-libs/trunk/streamexecutor/lib/unittests/DeviceTest.cpp =================================================================== --- parallel-libs/trunk/streamexecutor/lib/unittests/DeviceTest.cpp +++ parallel-libs/trunk/streamexecutor/lib/unittests/DeviceTest.cpp @@ -0,0 +1,476 @@ +//===-- DeviceTest.cpp - Tests for Device ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the unit tests for Device code. +/// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "streamexecutor/Device.h" +#include "streamexecutor/PlatformInterfaces.h" + +#include "gtest/gtest.h" + +namespace { + +namespace se = ::streamexecutor; + +class MockPlatformDevice : public se::PlatformDevice { +public: + ~MockPlatformDevice() override {} + + std::string getName() const override { return "MockPlatformDevice"; } + + se::Expected> + createStream() override { + return se::make_error("not implemented"); + } + + se::Expected + allocateDeviceMemory(size_t ByteCount) override { + return se::GlobalDeviceMemoryBase(std::malloc(ByteCount)); + } + + se::Error freeDeviceMemory(se::GlobalDeviceMemoryBase Memory) override { + std::free(const_cast(Memory.getHandle())); + return se::Error::success(); + } + + se::Expected allocateHostMemory(size_t ByteCount) override { + return std::malloc(ByteCount); + } + + se::Error freeHostMemory(void *Memory) override { + std::free(Memory); + return se::Error::success(); + } + + se::Error registerHostMemory(void *, size_t) override { + return se::Error::success(); + } + + se::Error unregisterHostMemory(void *) override { + return se::Error::success(); + } + + se::Error synchronousCopyD2H(const se::GlobalDeviceMemoryBase &DeviceSrc, + size_t SrcByteOffset, void *HostDst, + size_t DstByteOffset, + size_t ByteCount) override { + std::memcpy(static_cast(HostDst) + DstByteOffset, + static_cast(DeviceSrc.getHandle()) + + SrcByteOffset, + ByteCount); + return se::Error::success(); + } + + se::Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset, + se::GlobalDeviceMemoryBase DeviceDst, + size_t DstByteOffset, + size_t ByteCount) override { + std::memcpy(static_cast(const_cast(DeviceDst.getHandle())) + + DstByteOffset, + static_cast(HostSrc) + SrcByteOffset, ByteCount); + return se::Error::success(); + } + + se::Error synchronousCopyD2D(se::GlobalDeviceMemoryBase DeviceDst, + size_t DstByteOffset, + const se::GlobalDeviceMemoryBase &DeviceSrc, + size_t SrcByteOffset, + size_t ByteCount) override { + std::memcpy(static_cast(const_cast(DeviceDst.getHandle())) + + DstByteOffset, + static_cast(DeviceSrc.getHandle()) + + SrcByteOffset, + ByteCount); + return se::Error::success(); + } +}; + +/// Test fixture to hold objects used by tests. +class DeviceTest : public ::testing::Test { +public: + DeviceTest() + : HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9}, + HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23}, + DeviceA5(se::GlobalDeviceMemory::makeFromElementCount(HostA5, 5)), + DeviceB5(se::GlobalDeviceMemory::makeFromElementCount(HostB5, 5)), + DeviceA7(se::GlobalDeviceMemory::makeFromElementCount(HostA7, 7)), + DeviceB7(se::GlobalDeviceMemory::makeFromElementCount(HostB7, 7)), + Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35}, + Device(&PDevice) {} + + // Device memory is backed by host arrays. + int HostA5[5]; + int HostB5[5]; + int HostA7[7]; + int HostB7[7]; + se::GlobalDeviceMemory DeviceA5; + se::GlobalDeviceMemory DeviceB5; + se::GlobalDeviceMemory DeviceA7; + se::GlobalDeviceMemory DeviceB7; + + // Host memory to be used as actual host memory. + int Host5[5]; + int Host7[7]; + + MockPlatformDevice PDevice; + se::Device Device; +}; + +#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast(E)) +#define EXPECT_ERROR(E) \ + do { \ + se::Error E__ = E; \ + EXPECT_TRUE(static_cast(E__)); \ + consumeError(std::move(E__)); \ + } while (false) + +using llvm::ArrayRef; +using llvm::MutableArrayRef; + +TEST_F(DeviceTest, AllocateAndFreeDeviceMemory) { + se::Expected> MaybeMemory = + Device.allocateDeviceMemory(10); + EXPECT_TRUE(static_cast(MaybeMemory)); + EXPECT_NO_ERROR(Device.freeDeviceMemory(*MaybeMemory)); +} + +TEST_F(DeviceTest, AllocateAndFreeHostMemory) { + se::Expected MaybeMemory = Device.allocateHostMemory(10); + EXPECT_TRUE(static_cast(MaybeMemory)); + EXPECT_NO_ERROR(Device.freeHostMemory(*MaybeMemory)); +} + +TEST_F(DeviceTest, RegisterAndUnregisterHostMemory) { + std::vector Data(10); + EXPECT_NO_ERROR(Device.registerHostMemory(Data.data(), 10)); + EXPECT_NO_ERROR(Device.unregisterHostMemory(Data.data())); +} + +// D2H tests + +TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRefByCount) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host5), 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_NO_ERROR( + Device.synchronousCopyD2H(DeviceB5, MutableArrayRef(Host5), 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostB5[I], Host5[I]); + } + + EXPECT_ERROR( + Device.synchronousCopyD2H(DeviceA7, MutableArrayRef(Host5), 7)); + + EXPECT_ERROR( + Device.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host7), 7)); + + EXPECT_ERROR( + Device.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host5), 7)); +} + +TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRef) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host5))); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_ERROR( + Device.synchronousCopyD2H(DeviceA7, MutableArrayRef(Host5))); + + EXPECT_ERROR( + Device.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host7))); +} + +TEST_F(DeviceTest, SyncCopyD2HToPointer) { + EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5, Host5, 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5, Host7, 7)); +} + +TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRefByCount) { + EXPECT_NO_ERROR(Device.synchronousCopyD2H( + DeviceA5.asSlice().drop_front(1), MutableArrayRef(Host5 + 1, 4), 4)); + for (int I = 1; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1), + MutableArrayRef(Host5), 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostB5[I], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice(), + MutableArrayRef(Host5), 7)); + + EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), + MutableArrayRef(Host7), 7)); + + EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), + MutableArrayRef(Host5), 7)); +} + +TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRef) { + EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5), + MutableArrayRef(Host5))); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA7[I + 1], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1), + MutableArrayRef(Host5))); + + EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), + MutableArrayRef(Host7))); +} + +TEST_F(DeviceTest, SyncCopyD2HSliceToPointer) { + EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice().drop_front(1), + Host5 + 1, 4)); + for (int I = 1; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7)); +} + +// H2D tests + +TEST_F(DeviceTest, SyncCopyH2DToArrayRefByCount) { + EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA5, 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef(Host5), DeviceB5, 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostB5[I], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef(Host7), DeviceA5, 7)); + + EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA7, 7)); + + EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA5, 7)); +} + +TEST_F(DeviceTest, SyncCopyH2DToArrayRef) { + EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA7)); + + EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef(Host7), DeviceA5)); +} + +TEST_F(DeviceTest, SyncCopyH2DToPointer) { + EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5, 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5, 7)); +} + +TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRefByCount) { + EXPECT_NO_ERROR(Device.synchronousCopyH2D( + ArrayRef(Host5 + 1, 4), DeviceA5.asSlice().drop_front(1), 4)); + for (int I = 1; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_NO_ERROR(Device.synchronousCopyH2D( + ArrayRef(Host5), DeviceB5.asSlice().drop_back(1), 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostB5[I], Host5[I]); + } + + EXPECT_ERROR( + Device.synchronousCopyH2D(ArrayRef(Host7), DeviceA5.asSlice(), 7)); + + EXPECT_ERROR( + Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA7.asSlice(), 7)); + + EXPECT_ERROR( + Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA5.asSlice(), 7)); +} + +TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRef) { + EXPECT_NO_ERROR( + Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA5.asSlice())); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_ERROR( + Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA7.asSlice())); + + EXPECT_ERROR( + Device.synchronousCopyH2D(ArrayRef(Host7), DeviceA5.asSlice())); +} + +TEST_F(DeviceTest, SyncCopyH2DSliceToPointer) { + EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7)); +} + +// D2D tests + +TEST_F(DeviceTest, SyncCopyD2DByCount) { + EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], HostB5[I]); + } + + EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB7, 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostA7[I], HostB7[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 7)); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5, 7)); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7, 7)); +} + +TEST_F(DeviceTest, SyncCopyD2D) { + EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], HostB5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5)); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7)); +} + +TEST_F(DeviceTest, SyncCopySliceD2DByCount) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA5.asSlice().drop_front(1), DeviceB5, 4)); + for (int I = 0; I < 4; ++I) { + EXPECT_EQ(HostA5[I + 1], HostB5[I]); + } + + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1), DeviceB7, 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostA7[I], HostB7[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7)); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7)); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7)); +} + +TEST_F(DeviceTest, SyncCopySliceD2D) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA7[I], HostB5[I]); + } + + EXPECT_ERROR( + Device.synchronousCopyD2D(DeviceA7.asSlice().drop_front(1), DeviceB5)); + + EXPECT_ERROR( + Device.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7)); +} + +TEST_F(DeviceTest, SyncCopyD2DSliceByCount) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_front(2), 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], HostB7[I + 2]); + } + + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA7, DeviceB7.asSlice().drop_back(3), 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostA7[I], HostB7[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7)); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7)); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7)); +} + +TEST_F(DeviceTest, SyncCopyD2DSlice) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2))); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], HostB7[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice())); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice())); +} + +TEST_F(DeviceTest, SyncCopySliceD2DSliceByCount) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], HostB5[I]); + } + + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostA7[I], HostB7[I]); + } + + EXPECT_ERROR( + Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7)); + + EXPECT_ERROR( + Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7)); + + EXPECT_ERROR( + Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7)); +} + +TEST_F(DeviceTest, SyncCopySliceD2DSlice) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice())); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], HostB5[I]); + } + + EXPECT_ERROR( + Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice())); + + EXPECT_ERROR( + Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice())); +} + +} // namespace Index: parallel-libs/trunk/streamexecutor/lib/unittests/ExecutorTest.cpp =================================================================== --- parallel-libs/trunk/streamexecutor/lib/unittests/ExecutorTest.cpp +++ parallel-libs/trunk/streamexecutor/lib/unittests/ExecutorTest.cpp @@ -1,478 +0,0 @@ -//===-- ExecutorTest.cpp - Tests for Executor -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the unit tests for Executor code. -/// -//===----------------------------------------------------------------------===// - -#include -#include - -#include "streamexecutor/Executor.h" -#include "streamexecutor/PlatformInterfaces.h" - -#include "gtest/gtest.h" - -namespace { - -namespace se = ::streamexecutor; - -class MockPlatformExecutor : public se::PlatformExecutor { -public: - ~MockPlatformExecutor() override {} - - std::string getName() const override { return "MockPlatformExecutor"; } - - se::Expected> - createStream() override { - return se::make_error("not implemented"); - } - - se::Expected - allocateDeviceMemory(size_t ByteCount) override { - return se::GlobalDeviceMemoryBase(std::malloc(ByteCount)); - } - - se::Error freeDeviceMemory(se::GlobalDeviceMemoryBase Memory) override { - std::free(const_cast(Memory.getHandle())); - return se::Error::success(); - } - - se::Expected allocateHostMemory(size_t ByteCount) override { - return std::malloc(ByteCount); - } - - se::Error freeHostMemory(void *Memory) override { - std::free(Memory); - return se::Error::success(); - } - - se::Error registerHostMemory(void *, size_t) override { - return se::Error::success(); - } - - se::Error unregisterHostMemory(void *) override { - return se::Error::success(); - } - - se::Error synchronousCopyD2H(const se::GlobalDeviceMemoryBase &DeviceSrc, - size_t SrcByteOffset, void *HostDst, - size_t DstByteOffset, - size_t ByteCount) override { - std::memcpy(static_cast(HostDst) + DstByteOffset, - static_cast(DeviceSrc.getHandle()) + - SrcByteOffset, - ByteCount); - return se::Error::success(); - } - - se::Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset, - se::GlobalDeviceMemoryBase DeviceDst, - size_t DstByteOffset, - size_t ByteCount) override { - std::memcpy(static_cast(const_cast(DeviceDst.getHandle())) + - DstByteOffset, - static_cast(HostSrc) + SrcByteOffset, ByteCount); - return se::Error::success(); - } - - se::Error synchronousCopyD2D(se::GlobalDeviceMemoryBase DeviceDst, - size_t DstByteOffset, - const se::GlobalDeviceMemoryBase &DeviceSrc, - size_t SrcByteOffset, - size_t ByteCount) override { - std::memcpy(static_cast(const_cast(DeviceDst.getHandle())) + - DstByteOffset, - static_cast(DeviceSrc.getHandle()) + - SrcByteOffset, - ByteCount); - return se::Error::success(); - } -}; - -/// Test fixture to hold objects used by tests. -class ExecutorTest : public ::testing::Test { -public: - ExecutorTest() - : HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9}, - HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23}, - DeviceA5(se::GlobalDeviceMemory::makeFromElementCount(HostA5, 5)), - DeviceB5(se::GlobalDeviceMemory::makeFromElementCount(HostB5, 5)), - DeviceA7(se::GlobalDeviceMemory::makeFromElementCount(HostA7, 7)), - DeviceB7(se::GlobalDeviceMemory::makeFromElementCount(HostB7, 7)), - Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35}, - Executor(&PExecutor) {} - - // Device memory is backed by host arrays. - int HostA5[5]; - int HostB5[5]; - int HostA7[7]; - int HostB7[7]; - se::GlobalDeviceMemory DeviceA5; - se::GlobalDeviceMemory DeviceB5; - se::GlobalDeviceMemory DeviceA7; - se::GlobalDeviceMemory DeviceB7; - - // Host memory to be used as actual host memory. - int Host5[5]; - int Host7[7]; - - MockPlatformExecutor PExecutor; - se::Executor Executor; -}; - -#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast(E)) -#define EXPECT_ERROR(E) \ - do { \ - se::Error E__ = E; \ - EXPECT_TRUE(static_cast(E__)); \ - consumeError(std::move(E__)); \ - } while (false) - -using llvm::ArrayRef; -using llvm::MutableArrayRef; - -TEST_F(ExecutorTest, AllocateAndFreeDeviceMemory) { - se::Expected> MaybeMemory = - Executor.allocateDeviceMemory(10); - EXPECT_TRUE(static_cast(MaybeMemory)); - EXPECT_NO_ERROR(Executor.freeDeviceMemory(*MaybeMemory)); -} - -TEST_F(ExecutorTest, AllocateAndFreeHostMemory) { - se::Expected MaybeMemory = Executor.allocateHostMemory(10); - EXPECT_TRUE(static_cast(MaybeMemory)); - EXPECT_NO_ERROR(Executor.freeHostMemory(*MaybeMemory)); -} - -TEST_F(ExecutorTest, RegisterAndUnregisterHostMemory) { - std::vector Data(10); - EXPECT_NO_ERROR(Executor.registerHostMemory(Data.data(), 10)); - EXPECT_NO_ERROR(Executor.unregisterHostMemory(Data.data())); -} - -// D2H tests - -TEST_F(ExecutorTest, SyncCopyD2HToMutableArrayRefByCount) { - EXPECT_NO_ERROR( - Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host5), 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_NO_ERROR( - Executor.synchronousCopyD2H(DeviceB5, MutableArrayRef(Host5), 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostB5[I], Host5[I]); - } - - EXPECT_ERROR( - Executor.synchronousCopyD2H(DeviceA7, MutableArrayRef(Host5), 7)); - - EXPECT_ERROR( - Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host7), 7)); - - EXPECT_ERROR( - Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host5), 7)); -} - -TEST_F(ExecutorTest, SyncCopyD2HToMutableArrayRef) { - EXPECT_NO_ERROR( - Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host5))); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_ERROR( - Executor.synchronousCopyD2H(DeviceA7, MutableArrayRef(Host5))); - - EXPECT_ERROR( - Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host7))); -} - -TEST_F(ExecutorTest, SyncCopyD2HToPointer) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA5, Host5, 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5, Host7, 7)); -} - -TEST_F(ExecutorTest, SyncCopyD2HSliceToMutableArrayRefByCount) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2H( - DeviceA5.asSlice().drop_front(1), MutableArrayRef(Host5 + 1, 4), 4)); - for (int I = 1; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1), - MutableArrayRef(Host5), 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostB5[I], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice(), - MutableArrayRef(Host5), 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(), - MutableArrayRef(Host7), 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(), - MutableArrayRef(Host5), 7)); -} - -TEST_F(ExecutorTest, SyncCopyD2HSliceToMutableArrayRef) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5), - MutableArrayRef(Host5))); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA7[I + 1], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1), - MutableArrayRef(Host5))); - - EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(), - MutableArrayRef(Host7))); -} - -TEST_F(ExecutorTest, SyncCopyD2HSliceToPointer) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice().drop_front(1), - Host5 + 1, 4)); - for (int I = 1; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7)); -} - -// H2D tests - -TEST_F(ExecutorTest, SyncCopyH2DToArrayRefByCount) { - EXPECT_NO_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA5, 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_NO_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceB5, 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostB5[I], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef(Host7), DeviceA5, 7)); - - EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA7, 7)); - - EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA5, 7)); -} - -TEST_F(ExecutorTest, SyncCopyH2DToArrayRef) { - EXPECT_NO_ERROR(Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA7)); - - EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef(Host7), DeviceA5)); -} - -TEST_F(ExecutorTest, SyncCopyH2DToPointer) { - EXPECT_NO_ERROR(Executor.synchronousCopyH2D(Host5, DeviceA5, 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyH2D(Host7, DeviceA5, 7)); -} - -TEST_F(ExecutorTest, SyncCopyH2DSliceToArrayRefByCount) { - EXPECT_NO_ERROR(Executor.synchronousCopyH2D( - ArrayRef(Host5 + 1, 4), DeviceA5.asSlice().drop_front(1), 4)); - for (int I = 1; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_NO_ERROR(Executor.synchronousCopyH2D( - ArrayRef(Host5), DeviceB5.asSlice().drop_back(1), 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostB5[I], Host5[I]); - } - - EXPECT_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host7), DeviceA5.asSlice(), 7)); - - EXPECT_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA7.asSlice(), 7)); - - EXPECT_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA5.asSlice(), 7)); -} - -TEST_F(ExecutorTest, SyncCopyH2DSliceToArrayRef) { - EXPECT_NO_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA5.asSlice())); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA7.asSlice())); - - EXPECT_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host7), DeviceA5.asSlice())); -} - -TEST_F(ExecutorTest, SyncCopyH2DSliceToPointer) { - EXPECT_NO_ERROR(Executor.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7)); -} - -// D2D tests - -TEST_F(ExecutorTest, SyncCopyD2DByCount) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5, 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], HostB5[I]); - } - - EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB7, 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostA7[I], HostB7[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5, 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5, 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7, 7)); -} - -TEST_F(ExecutorTest, SyncCopyD2D) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], HostB5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5)); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7)); -} - -TEST_F(ExecutorTest, SyncCopySliceD2DByCount) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice().drop_front(1), - DeviceB5, 4)); - for (int I = 0; I < 4; ++I) { - EXPECT_EQ(HostA5[I + 1], HostB5[I]); - } - - EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1), - DeviceB7, 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostA7[I], HostB7[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7)); -} - -TEST_F(ExecutorTest, SyncCopySliceD2D) { - EXPECT_NO_ERROR( - Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA7[I], HostB5[I]); - } - - EXPECT_ERROR( - Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_front(1), DeviceB5)); - - EXPECT_ERROR( - Executor.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7)); -} - -TEST_F(ExecutorTest, SyncCopyD2DSliceByCount) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2D( - DeviceA5, DeviceB7.asSlice().drop_front(2), 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], HostB7[I + 2]); - } - - EXPECT_NO_ERROR(Executor.synchronousCopyD2D( - DeviceA7, DeviceB7.asSlice().drop_back(3), 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostA7[I], HostB7[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7)); -} - -TEST_F(ExecutorTest, SyncCopyD2DSlice) { - EXPECT_NO_ERROR( - Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2))); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], HostB7[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice())); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice())); -} - -TEST_F(ExecutorTest, SyncCopySliceD2DSliceByCount) { - EXPECT_NO_ERROR( - Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], HostB5[I]); - } - - EXPECT_NO_ERROR( - Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostA7[I], HostB7[I]); - } - - EXPECT_ERROR( - Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7)); - - EXPECT_ERROR( - Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7)); - - EXPECT_ERROR( - Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7)); -} - -TEST_F(ExecutorTest, SyncCopySliceD2DSlice) { - EXPECT_NO_ERROR( - Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice())); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], HostB5[I]); - } - - EXPECT_ERROR( - Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice())); - - EXPECT_ERROR( - Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice())); -} - -} // namespace Index: parallel-libs/trunk/streamexecutor/lib/unittests/KernelTest.cpp =================================================================== --- parallel-libs/trunk/streamexecutor/lib/unittests/KernelTest.cpp +++ parallel-libs/trunk/streamexecutor/lib/unittests/KernelTest.cpp @@ -14,7 +14,7 @@ #include -#include "streamexecutor/Executor.h" +#include "streamexecutor/Device.h" #include "streamexecutor/Kernel.h" #include "streamexecutor/KernelSpec.h" #include "streamexecutor/PlatformInterfaces.h" @@ -27,7 +27,7 @@ namespace se = ::streamexecutor; -// An Executor that returns a dummy KernelInterface. +// A Device that returns a dummy KernelInterface. // // During construction it creates a unique_ptr to a dummy KernelInterface and it // also stores a separate copy of the raw pointer that is stored by that @@ -39,10 +39,10 @@ // object. The raw pointer copy can then be used to identify the unique_ptr in // its new location (by comparing the raw pointer with unique_ptr::get), to // verify that the unique_ptr ended up where it was supposed to be. -class MockExecutor : public se::Executor { +class MockDevice : public se::Device { public: - MockExecutor() - : se::Executor(nullptr), Unique(llvm::make_unique()), + MockDevice() + : se::Device(nullptr), Unique(llvm::make_unique()), Raw(Unique.get()) {} // Moves the unique pointer into the returned se::Expected instance. @@ -51,7 +51,7 @@ // out. se::Expected> getKernelImplementation(const se::MultiKernelLoaderSpec &) override { - assert(Unique && "MockExecutor getKernelImplementation should not be " + assert(Unique && "MockDevice getKernelImplementation should not be " "called more than once"); return std::move(Unique); } @@ -79,15 +79,15 @@ // Tests that the kernel create functions properly fetch the implementation // pointers for the kernel objects they construct from the passed-in -// Executor objects. +// Device objects. TYPED_TEST(GetImplementationTest, SetImplementationDuringCreate) { se::MultiKernelLoaderSpec Spec; - MockExecutor MockExecutor; + MockDevice Dev; - auto MaybeKernel = TypeParam::create(&MockExecutor, Spec); + auto MaybeKernel = TypeParam::create(&Dev, Spec); EXPECT_TRUE(static_cast(MaybeKernel)); se::KernelInterface *Implementation = MaybeKernel->getImplementation(); - EXPECT_EQ(MockExecutor.getRaw(), Implementation); + EXPECT_EQ(Dev.getRaw(), Implementation); } } // namespace Index: parallel-libs/trunk/streamexecutor/lib/unittests/StreamTest.cpp =================================================================== --- parallel-libs/trunk/streamexecutor/lib/unittests/StreamTest.cpp +++ parallel-libs/trunk/streamexecutor/lib/unittests/StreamTest.cpp @@ -14,7 +14,7 @@ #include -#include "streamexecutor/Executor.h" +#include "streamexecutor/Device.h" #include "streamexecutor/Kernel.h" #include "streamexecutor/KernelSpec.h" #include "streamexecutor/PlatformInterfaces.h" @@ -26,14 +26,14 @@ namespace se = ::streamexecutor; -/// Mock PlatformExecutor that performs asynchronous memcpy operations by +/// Mock PlatformDevice that performs asynchronous memcpy operations by /// ignoring the stream argument and calling std::memcpy on device memory /// handles. -class MockPlatformExecutor : public se::PlatformExecutor { +class MockPlatformDevice : public se::PlatformDevice { public: - ~MockPlatformExecutor() override {} + ~MockPlatformDevice() override {} - std::string getName() const override { return "MockPlatformExecutor"; } + std::string getName() const override { return "MockPlatformDevice"; } se::Expected> createStream() override { @@ -83,7 +83,7 @@ DeviceA7(se::GlobalDeviceMemory::makeFromElementCount(HostA7, 7)), DeviceB7(se::GlobalDeviceMemory::makeFromElementCount(HostB7, 7)), Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35}, - Stream(llvm::make_unique(&PExecutor)) {} + Stream(llvm::make_unique(&PDevice)) {} protected: // Device memory is backed by host arrays. @@ -100,7 +100,7 @@ int Host5[5]; int Host7[7]; - MockPlatformExecutor PExecutor; + MockPlatformDevice PDevice; se::Stream Stream; };