Index: streamexecutor/CMakeLists.txt =================================================================== --- streamexecutor/CMakeLists.txt +++ streamexecutor/CMakeLists.txt @@ -24,6 +24,18 @@ include_directories(${LLVM_INCLUDE_DIRS}) add_definitions(${LLVM_DEFINITIONS}) + # Get the LLVM cxxflags by using llvm-config. + # + # This is necessary to get -fno-rtti if LLVM is compiled that way. + execute_process( + COMMAND + "${LLVM_BINARY_DIR}/bin/llvm-config" + --cxxflags + OUTPUT_VARIABLE + LLVM_CXXFLAGS + OUTPUT_STRIP_TRAILING_WHITESPACE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${LLVM_CXXFLAGS}") + # Find the libraries that correspond to the LLVM components # that we wish to use llvm_map_components_to_libnames(llvm_libs support symbolize) Index: streamexecutor/include/streamexecutor/Interfaces.h =================================================================== --- streamexecutor/include/streamexecutor/Interfaces.h +++ streamexecutor/include/streamexecutor/Interfaces.h @@ -22,7 +22,9 @@ // TODO(jhen): Add methods. }; -// TODO(jhen): Add other interfaces such as Stream. +class StreamInterface { + // TODO(jhen): Add methods. +}; } // namespace streamexecutor Index: streamexecutor/include/streamexecutor/LaunchDimensions.h =================================================================== --- /dev/null +++ streamexecutor/include/streamexecutor/LaunchDimensions.h @@ -0,0 +1,47 @@ +//===-- LaunchDimensions.h - Kernel block and grid sizes --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Structures to hold sizes for blocks and grids which are used as parameters +/// for kernel launches. +/// +//===----------------------------------------------------------------------===// + +#ifndef STREAMEXECUTOR_LAUNCHDIMENSIONS_H +#define STREAMEXECUTOR_LAUNCHDIMENSIONS_H + +namespace streamexecutor { + +/// The dimensions of a device block of execution. +/// +/// A block is made up of an array of X by Y by Z threads. +struct BlockDimensions { + BlockDimensions(unsigned X = 1, unsigned Y = 1, unsigned Z = 1) + : X(X), Y(Y), Z(Z) {} + + unsigned X; + unsigned Y; + unsigned Z; +}; + +/// The dimensions of a device grid of execution. +/// +/// A grid is made up of an array of X by Y by Z blocks. +struct GridDimensions { + GridDimensions(unsigned X = 1, unsigned Y = 1, unsigned Z = 1) + : X(X), Y(Y), Z(Z) {} + + unsigned X; + unsigned Y; + unsigned Z; +}; + +} // namespace streamexecutor + +#endif // STREAMEXECUTOR_LAUNCHDIMENSIONS_H Index: streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h =================================================================== --- streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h +++ streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h @@ -47,6 +47,12 @@ /// efficiently, although it is probably more information than is needed for any /// specific platform. /// +/// The PackedKernelArgumentArrayBase class has no template parameters, so it +/// does not benefit from compile-time type checking. However, since it has no +/// template parameters, it can be passed as an argument to virtual functions, +/// and this allows it to be passed to functions that use virtual function +/// overloading to handle platform-specific kernel launching. +/// //===----------------------------------------------------------------------===// #ifndef STREAMEXECUTOR_PACKEDKERNELARGUMENTARRAY_H @@ -64,39 +70,81 @@ SHARED_DEVICE_MEMORY /// Shared device memory argument. }; -/// An array of packed kernel arguments. -template class PackedKernelArgumentArray { +/// An array of packed kernel arguments without compile-time type information. +/// +/// This un-templated base class is useful because packed kernel arguments must +/// at some point be passed to a virtual function that performs +/// platform-specific kernel launches. Such a virtual function cannot be +/// templated to handle all specializations of the +/// PackedKernelArgumentArray<...> class template, so, instead, references to +/// PackedKernelArgumentArray<...> are passed as references to this base class. +class PackedKernelArgumentArrayBase { public: - /// Constructs an instance by packing the specified arguments. - PackedKernelArgumentArray(const ParameterTs &... Arguments) - : SharedCount(0u) { - PackArguments(0, Arguments...); - } + virtual ~PackedKernelArgumentArrayBase(); /// Gets the number of packed arguments. - size_t getArgumentCount() const { return sizeof...(ParameterTs); } + size_t getArgumentCount() const { return ArgumentCount; } /// Gets the address of the argument at the given index. - const void *getAddress(size_t Index) const { return Addresses[Index]; } + const void *getAddress(size_t Index) const { return AddressesData[Index]; } /// Gets the size of the argument at the given index. - size_t getSize(size_t Index) const { return Sizes[Index]; } + size_t getSize(size_t Index) const { return SizesData[Index]; } /// Gets the type of the argument at the given index. - KernelArgumentType getType(size_t Index) const { return Types[Index]; } + KernelArgumentType getType(size_t Index) const { return TypesData[Index]; } /// Gets a pointer to the address array. - const void *const *getAddresses() const { return Addresses.data(); } + const void *const *getAddresses() const { return AddressesData; } /// Gets a pointer to the sizes array. - const size_t *getSizes() const { return Sizes.data(); } + const size_t *getSizes() const { return SizesData; } /// Gets a pointer to the types array. - const KernelArgumentType *getTypes() const { return Types.data(); } + const KernelArgumentType *getTypes() const { return TypesData; } /// Gets the number of shared device memory arguments. size_t getSharedCount() const { return SharedCount; } +protected: + PackedKernelArgumentArrayBase(size_t ArgumentCount) + : ArgumentCount(ArgumentCount), SharedCount(0u) {} + + size_t ArgumentCount; + size_t SharedCount; + const void *const *AddressesData; + size_t *SizesData; + KernelArgumentType *TypesData; +}; + +/// An array of packed kernel arguments with compile-time type information. +/// +/// This is used by the platform-independent StreamExecutor code to pack +/// arguments in a compile-time type-safe way. In order to actually launch a +/// kernel on a specific platform, however, a reference to this class will have +/// to be passed to a virtual, platform-specific kernel launch function. Such a +/// reference will be passed as a reference to the base class rather than a +/// reference to this subclass itself because a virtual function cannot be +/// templated in such a way to maintain the template parameter types of the +/// subclass. +template +class PackedKernelArgumentArray : public PackedKernelArgumentArrayBase { +public: + /// Constructs an instance by packing the specified arguments. + /// + /// Rather than using this constructor directly, consider using the + /// make_kernel_argument_pack function instead, to get the compiler to infer + /// the parameter types for you. + PackedKernelArgumentArray(const ParameterTs &... Arguments) + : PackedKernelArgumentArrayBase(sizeof...(ParameterTs)) { + AddressesData = Addresses.data(); + SizesData = Sizes.data(); + TypesData = Types.data(); + PackArguments(0, Arguments...); + } + + ~PackedKernelArgumentArray() override = default; + private: // Base case for PackArguments when there are no arguments to pack. void PackArguments(size_t) {} @@ -215,7 +263,6 @@ std::array Addresses; std::array Sizes; std::array Types; - size_t SharedCount; }; // Utility template function to call the PackedKernelArgumentArray constructor Index: streamexecutor/include/streamexecutor/Passkey.h =================================================================== --- /dev/null +++ streamexecutor/include/streamexecutor/Passkey.h @@ -0,0 +1,74 @@ +//===-- Passkey.h - Key for method access control ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines the Passkey struct for inter-class method access control. +/// +//===----------------------------------------------------------------------===// + +#ifndef STREAMEXECUTOR_PASSKEYS_H +#define STREAMEXECUTOR_PASSKEYS_H + +namespace streamexecutor { + +/// A key object that can only be created by class T. +/// +/// A Passkey is meant to be used as a function parameter to indicate that a +/// function should only be called by class T. This allows more fine-grained +/// function access control than the basic use of public, private, and friend +/// because it only provides access for a single class to a single function. +/// +/// A Passkey also contains a reference to an instance of type T so that the +/// Passkey can identify the specific instance of T that is making the function +/// call. The constructor provides an implicit conversion from T* to Passkey +/// so an object of type T can pass its "this" pointer wherever a Passkey is +/// expected. +/// +/// Here is an example of its use: +/// \code +/// class B; +/// +/// class A { +/// public: +/// // This method is technically public, but only class B will ever be +/// // able to create a Passkey, so it is effectively private to B. +/// void doStuffOnlyBCanDo(Passkey &Key); +/// }; +/// +/// class B { +/// public: +/// callA(A *APointer) { +/// // B can use its "this" pointer as a Passkey argument. +/// APointer->onlyAvailableToB(this); +/// } +/// }; +/// \endcode +template struct Passkey { +public: + /// The pointer used to create the key. + T *Reference; + +private: + /// T is a friend so it can call the private constructor. + friend T; + + /// Allow implicit construction so a T object can pass "this" to a function + /// that expects a Passkey& argument. + Passkey(T *Reference) : Reference(Reference) {} + + /// Disallow copy and assignment construction to prevent cloning. + ///@{ + Passkey(const T &) = delete; + Passkey &operator=(const T &) = delete; + ///@} +}; + +} // namespace streamexecutor + +#endif // STREAMEXECUTOR_PASSKEYS_H Index: streamexecutor/include/streamexecutor/Stream.h =================================================================== --- /dev/null +++ streamexecutor/include/streamexecutor/Stream.h @@ -0,0 +1,259 @@ +//===-- Stream.h - A stream of execution ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// A Stream instance represents a queue of sequential, host-asynchronous work +/// to be performed on a device. +/// +/// To enqueue work on a device, first create a StreamExecutor instance for a +/// given device and then use that StreamExecutor to create a Stream instance. +/// The Stream instance will perform its work on the device managed by the +/// StreamExecutor that created it. +/// +/// The various "then" methods of the Stream object, such as thenMemcpyH2D and +/// thenLaunch, may be used to enqueue work on the Stream, and the +/// blockHostUntilDone() method may be used to block the host code until the +/// Stream has completed all its work. +/// +/// Multiple Stream instances can be created for the same StreamExecutor. This +/// allows several independent streams of computation to be performed +/// simultaneously on a single device. +/// +//===----------------------------------------------------------------------===// + +#ifndef STREAMEXECUTOR_STREAM_H +#define STREAMEXECUTOR_STREAM_H + +#include +#include +#include + +#include "streamexecutor/DeviceMemory.h" +#include "streamexecutor/Kernel.h" +#include "streamexecutor/LaunchDimensions.h" +#include "streamexecutor/PackedKernelArgumentArray.h" +#include "streamexecutor/Passkey.h" +#include "streamexecutor/Utils/Error.h" + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/RWMutex.h" + +namespace streamexecutor { + +class StreamExecutor; +class StreamInterface; + +/// Represents a stream of dependent computations on a device. +/// +/// The operations within a stream execute sequentially and asynchronously until +/// blockHostUntilDone() is invoked, which synchronously joins host code with +/// the execution of the stream. +/// +/// If any given operation fails when entraining work for the stream, isOK() +/// will indicate that an error has occurred and getStatus() will get the first +/// error that occurred on the stream. There is no way to clear the error state +/// of a stream once it is in an error state. +class Stream { +public: + // Only a StreamExecutor can construct a Stream because only a StreamExecutor + // can create a Passkey. + // + // This protection enforces the design that StreamInterface pointers are only + // used internally by StreamExecutor and should not be passed around by users. + Stream(const Passkey &Key, + std::unique_ptr Implementation); + + ~Stream(); + + /// Returns whether any error has occurred while entraining work on this + /// stream. + bool isOK() const { + llvm::sys::ScopedReader ReaderLock(ErrorMessageMutex); + return !ErrorMessage; + } + + /// Returns the status created by the first error that occurred while + /// entraining work on this stream. + Error getStatus() const { + llvm::sys::ScopedReader ReaderLock(ErrorMessageMutex); + if (ErrorMessage) + return make_error(*ErrorMessage); + else + return Error::success(); + }; + + /// Entrains onto the stream of operations a kernel launch with the given + /// arguments. + /// + /// These arguments can be device memory types like GlobalDeviceMemory and + /// SharedDeviceMemory, or they can be primitive types such as int. The + /// allowable argument types are determined by the template parameters to the + /// TypedKernel argument. + template + Stream &thenLaunch(BlockDimensions BlockSize, GridDimensions GridSize, + const TypedKernel &Kernel, + const ParameterTs &... Arguments) { + auto ArgumentArray = + make_kernel_argument_pack(Arguments...); + return thenRawLaunch(BlockSize, GridSize, Kernel, ArgumentArray); + } + + /// Entrain onto the stream a memcpy of a given number of elements from a + /// device source to a host destination. + /// + /// HostDst must be a pointer to host memory allocated by + /// StreamExecutor::allocateHostMemory or otherwise allocated and then + /// registered with StreamExecutor::registerHostMemory. + template + Stream &thenMemcpyD2H(const GlobalDeviceMemory &DeviceSrc, + llvm::MutableArrayRef HostDst, size_t ElementCount) { + if (ElementCount > DeviceSrc.getElementCount()) { + setError("copying too many elements, " + llvm::Twine(ElementCount) + + ", from device memory array of size " + + llvm::Twine(DeviceSrc.getElementCount())); + } else if (ElementCount > HostDst.size()) { + setError("copying too many elements, " + llvm::Twine(ElementCount) + + ", to host array of size " + llvm::Twine(HostDst.size())); + } else { + thenRawMemcpyD2H(DeviceSrc, HostDst.data(), ElementCount * sizeof(T)); + } + return *this; + } + + /// Same as thenMemcpyD2H above, but copies the entire source to the + /// destination. + template + Stream &thenMemcpyD2H(const GlobalDeviceMemory &DeviceSrc, + llvm::MutableArrayRef HostDst) { + return thenMemcpyD2H(DeviceSrc, HostDst, DeviceSrc.getElementCount()); + } + + /// Entrain onto the stream a memcpy of a given number of elements from a host + /// source to a device destination. + /// + /// HostSrc must be a pointer to host memory allocated by + /// StreamExecutor::allocateHostMemory or otherwise allocated and then + /// registered with StreamExecutor::registerHostMemory. + template + Stream &thenMemcpyH2D(llvm::ArrayRef HostSrc, + GlobalDeviceMemory *DeviceDst, size_t ElementCount) { + if (ElementCount > HostSrc.size()) { + setError("copying too many elements, " + llvm::Twine(ElementCount) + + ", from host array of size " + llvm::Twine(HostSrc.size())); + } else if (ElementCount > DeviceDst->getElementCount()) { + setError("copying too many elements, " + llvm::Twine(ElementCount) + + ", to device memory array of size " + + llvm::Twine(DeviceDst->getElementCount())); + } else { + thenRawMemcpyH2D(HostSrc.data(), DeviceDst, ElementCount * sizeof(T)); + } + return *this; + } + + /// Same as thenMemcpyH2D above, but copies the entire source to the + /// destination. + template + Stream &thenMemcpyH2D(llvm::ArrayRef HostSrc, + GlobalDeviceMemory *DeviceDst) { + return thenMemcpyH2D(HostSrc, DeviceDst, HostSrc.size()); + } + + /// Entrain onto the stream a memcpy of a given number of elements from a + /// device source to a device destination. + template + Stream &thenMemcpyD2D(const GlobalDeviceMemory &DeviceSrc, + GlobalDeviceMemory *DeviceDst, size_t ElementCount) { + if (ElementCount > DeviceSrc.getElementCount()) { + setError("copying too many elements, " + llvm::Twine(ElementCount) + + ", from device memory array of size " + + llvm::Twine(DeviceSrc.getElementCount())); + } else if (ElementCount > DeviceDst->getElementCount()) { + setError("copying too many elements, " + llvm::Twine(ElementCount) + + ", to device memory array of size " + + llvm::Twine(DeviceDst->getElementCount())); + } else { + thenRawMemcpyD2D(DeviceSrc, DeviceDst, ElementCount * sizeof(T)); + } + return *this; + } + + /// Same as thenMemcpyD2D above, but copies the entire source to the + /// destination. + template + Stream &thenMemcpyD2D(const GlobalDeviceMemory &DeviceSrc, + GlobalDeviceMemory *DeviceDst) { + return thenMemcpyD2D(DeviceSrc, DeviceDst, DeviceSrc.getElementCount()); + } + + /// Blocks the host code, waiting for the operations entrained on the stream + /// (enqueued up to this point in program execution) to complete. + /// + /// Returns true if there are no errors on the stream. + bool blockHostUntilDone(); + +private: + /// Sets the error state from an Error object. + /// + /// Does not overwrite the error if it is already set. + void setError(Error &&E) { + if (E) { + llvm::sys::ScopedWriter WriterLock(ErrorMessageMutex); + if (!ErrorMessage) { + ErrorMessage = consumeAndGetMessage(std::move(E)); + } + } + } + + /// Sets the error state from an error message. + /// + /// Does not overwrite the error if it is already set. + void setError(llvm::Twine Message) { + llvm::sys::ScopedWriter WriterLock(ErrorMessageMutex); + if (!ErrorMessage) { + ErrorMessage = Message.str(); + } + } + + Stream &thenRawLaunch(BlockDimensions BlockSize, GridDimensions GridSize, + const KernelBase &Kernel, + const PackedKernelArgumentArrayBase &ArgumentArray); + + Stream &thenRawMemcpyD2H(const GlobalDeviceMemoryBase &DeviceSrc, + void *HostDst, size_t ByteCount); + + Stream &thenRawMemcpyH2D(const void *HostSrc, + GlobalDeviceMemoryBase *DeviceDst, size_t ByteCount); + + Stream &thenRawMemcpyD2D(const GlobalDeviceMemoryBase &DeviceSrc, + GlobalDeviceMemoryBase *DeviceDst, size_t ByteCount); + + /// The StreamExecutor that supports the operations of this stream. + StreamExecutor *Parent; + + /// Platform-specific implementation. + std::unique_ptr Implementation; + + /// Mutex that guards the error state flags. + /// + /// Mutable so that it can be obtained via const reader lock. + mutable llvm::sys::RWMutex ErrorMessageMutex; + + /// First error message for an operation in this stream or empty if there have + /// been no errors. + llvm::Optional ErrorMessage; + + Stream(const Stream &) = delete; + void operator=(const Stream &) = delete; +}; + +} // namespace streamexecutor + +#endif // STREAMEXECUTOR_STREAM_H Index: streamexecutor/include/streamexecutor/StreamExecutor.h =================================================================== --- streamexecutor/include/streamexecutor/StreamExecutor.h +++ streamexecutor/include/streamexecutor/StreamExecutor.h @@ -16,14 +16,20 @@ #ifndef STREAMEXECUTOR_STREAMEXECUTOR_H #define STREAMEXECUTOR_STREAMEXECUTOR_H +#include "streamexecutor/DeviceMemory.h" +#include "streamexecutor/Passkey.h" +#include "streamexecutor/Stream.h" #include "streamexecutor/Utils/Error.h" namespace streamexecutor { class KernelInterface; +class Stream; class StreamExecutor { public: + virtual ~StreamExecutor(); + /// Gets the kernel implementation for the underlying platform. virtual Expected> getKernelImplementation(const MultiKernelLoaderSpec &Spec) { @@ -31,7 +37,58 @@ return nullptr; } - // TODO(jhen): Add other methods. + Expected> createStream() { + // TODO(jhen): Get the real implementation pointer. + return std::unique_ptr(new Stream(this, nullptr)); + } + + /// \name Stream-only interface. + /// + /// Methods that should only be called by a Stream. This protection is + /// enforced by requiring a Passkey argument. + ///@{ + + Error launch(const Passkey &Key, BlockDimensions BlockSize, + GridDimensions GridSize, const KernelBase &Kernel, + const PackedKernelArgumentArrayBase &ArgumentArray) { + // TODO(jhen): Implement this. + return Error::success(); + } + + /// Destroys the stream and records an error if the destruction fails. + /// + /// TODO(jhen): Create an API for querying stream destruction errors. It will + /// probably also have kernel, event, timer, etc. destruction errors as well. + void deallocateStream(const Passkey &Key) { + // TODO(jhen): Implement this. + } + + Error memcpyD2H(const Passkey &Key, + const GlobalDeviceMemoryBase &DeviceSrc, void *HostDst, + size_t ByteCount) { + // TODO(jhen): Implement this. + return Error::success(); + } + + Error memcpyH2D(const Passkey &Key, const void *HostSrc, + GlobalDeviceMemoryBase *DeviceDst, size_t ByteCount) { + // TODO(jhen): Implement this. + return Error::success(); + } + + Error memcpyD2D(const Passkey &Key, + const GlobalDeviceMemoryBase &DeviceSrc, + GlobalDeviceMemoryBase *DeviceDst, size_t ByteCount) { + // TODO(jhen): Implement this. + return Error::success(); + } + + Error blockHostUntilDone(const Passkey &Key) { + // TODO(jhen): Implement this. + return Error::success(); + } + + ///@} End stream-only interface }; } // namespace streamexecutor Index: streamexecutor/lib/CMakeLists.txt =================================================================== --- streamexecutor/lib/CMakeLists.txt +++ streamexecutor/lib/CMakeLists.txt @@ -7,7 +7,10 @@ streamexecutor $ Kernel.cpp - KernelSpec.cpp) + KernelSpec.cpp + PackedKernelArgumentArray.cpp + Stream.cpp + StreamExecutor.cpp) target_link_libraries(streamexecutor ${llvm_libs}) if(STREAM_EXECUTOR_UNIT_TESTS) Index: streamexecutor/lib/PackedKernelArgumentArray.cpp =================================================================== --- /dev/null +++ streamexecutor/lib/PackedKernelArgumentArray.cpp @@ -0,0 +1,21 @@ +//===-- PackedKernelArgumentArray.cpp - Packed argument array impl --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implementation details for classes from PackedKernelArgumentArray.h. +/// +//===----------------------------------------------------------------------===// + +#include "streamexecutor/PackedKernelArgumentArray.h" + +namespace streamexecutor { + +PackedKernelArgumentArrayBase::~PackedKernelArgumentArrayBase() = default; + +} // namespace streamexecutor Index: streamexecutor/lib/Stream.cpp =================================================================== --- /dev/null +++ streamexecutor/lib/Stream.cpp @@ -0,0 +1,63 @@ +//===-- Stream.cpp - General stream implementation ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the implementation details for a general stream object. +/// +//===----------------------------------------------------------------------===// + +#include "streamexecutor/Stream.h" + +#include "streamexecutor/Interfaces.h" +#include "streamexecutor/StreamExecutor.h" + +namespace streamexecutor { + +Stream::Stream(const Passkey &Key, + std::unique_ptr Implementation) + : Parent(Key.Reference), Implementation(std::move(Implementation)) {} + +Stream::~Stream() { Parent->deallocateStream(this); } + +Stream & +Stream::thenRawLaunch(BlockDimensions BlockSize, GridDimensions GridSize, + const KernelBase &Kernel, + const PackedKernelArgumentArrayBase &ArgumentArray) { + setError(Parent->launch(this, BlockSize, GridSize, Kernel, ArgumentArray)); + return *this; +} + +Stream &Stream::thenRawMemcpyD2H(const GlobalDeviceMemoryBase &DeviceSrc, + void *HostDst, size_t ByteCount) { + setError(Parent->memcpyD2H(this, DeviceSrc, HostDst, ByteCount)); + return *this; +} + +Stream &Stream::thenRawMemcpyH2D(const void *HostSrc, + GlobalDeviceMemoryBase *DeviceDst, + size_t ByteCount) { + setError(Parent->memcpyH2D(this, HostSrc, DeviceDst, ByteCount)); + return *this; +} + +Stream &Stream::thenRawMemcpyD2D(const GlobalDeviceMemoryBase &DeviceSrc, + GlobalDeviceMemoryBase *DeviceDst, + size_t ByteCount) { + setError(Parent->memcpyD2D(this, DeviceSrc, DeviceDst, ByteCount)); + return *this; +} + +bool Stream::blockHostUntilDone() { + if (isOK()) { + setError(Parent->blockHostUntilDone(this)); + } + return isOK(); +} + +} // namespace streamexecutor Index: streamexecutor/lib/StreamExecutor.cpp =================================================================== --- /dev/null +++ streamexecutor/lib/StreamExecutor.cpp @@ -0,0 +1,22 @@ +//===-- StreamExecutor.cpp - StreamExecutor implementation ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implementation of StreamExecutor class internals. +/// +//===----------------------------------------------------------------------===// + +#include "streamexecutor/StreamExecutor.h" +#include "streamexecutor/Interfaces.h" + +namespace streamexecutor { + +StreamExecutor::~StreamExecutor() = default; + +} // namespace streamexecutor Index: streamexecutor/lib/unittests/CMakeLists.txt =================================================================== --- streamexecutor/lib/unittests/CMakeLists.txt +++ streamexecutor/lib/unittests/CMakeLists.txt @@ -23,7 +23,19 @@ PackedKernelArgumentArrayTest.cpp) target_link_libraries( packed_kernel_argument_array_test + streamexecutor ${llvm_libs} ${GTEST_BOTH_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) add_test(PackedKernelArgumentArrayTest packed_kernel_argument_array_test) + +add_executable( + stream_test + StreamTest.cpp) +target_link_libraries( + stream_test + streamexecutor + ${llvm_libs} + ${GTEST_BOTH_LIBRARIES} + ${CMAKE_THREAD_LIBS_INIT}) +add_test(StreamTest stream_test) Index: streamexecutor/lib/unittests/StreamTest.cpp =================================================================== --- /dev/null +++ streamexecutor/lib/unittests/StreamTest.cpp @@ -0,0 +1,117 @@ +//===-- StreamTest.cpp - Tests for Stream ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the unit tests for Stream code. +/// +//===----------------------------------------------------------------------===// + +#include "streamexecutor/Interfaces.h" +#include "streamexecutor/Kernel.h" +#include "streamexecutor/KernelSpec.h" +#include "streamexecutor/Stream.h" +#include "streamexecutor/StreamExecutor.h" + +#include "gtest/gtest.h" + +namespace { + +namespace se = ::streamexecutor; + +// A StreamExecutor that just returns a dummy KernelInterface when +// getKernelImplementation is called. +// +// Used to build dummy kernels for testing kernel launching. +class MockStreamExecutor : public se::StreamExecutor { +public: + se::Expected> + getKernelImplementation(const se::MultiKernelLoaderSpec &) override { + return llvm::make_unique(); + } +}; + +/// Test fixture to hold objects used by tests. +class StreamTest : public ::testing::Test { +public: + StreamTest() + : DeviceA(se::GlobalDeviceMemory::makeFromElementCount(nullptr, 10)), + DeviceB(se::GlobalDeviceMemory::makeFromElementCount(nullptr, 10)), + SharedDevice(se::SharedDeviceMemory::makeFromElementCount(20)) { + auto MaybeStream = Executor.createStream(); + if (MaybeStream) { + Stream = std::move(*MaybeStream); + } + } + +protected: + se::StreamExecutor Executor; + std::unique_ptr Stream; + int Host[10]; + se::GlobalDeviceMemory DeviceA; + se::GlobalDeviceMemory DeviceB; + se::SharedDeviceMemory SharedDevice; + se::BlockDimensions BlockDims; + se::GridDimensions GridDims; + se::MultiKernelLoaderSpec Spec; + MockStreamExecutor MockExecutor; +}; + +TEST_F(StreamTest, MemcpyCorrectSize) { + Stream->thenMemcpyH2D(llvm::ArrayRef(Host), &DeviceA); + EXPECT_TRUE(Stream->isOK()); + + Stream->thenMemcpyD2H(DeviceA, llvm::MutableArrayRef(Host)); + EXPECT_TRUE(Stream->isOK()); + + Stream->thenMemcpyD2D(DeviceA, &DeviceB); + EXPECT_TRUE(Stream->isOK()); +} + +TEST_F(StreamTest, MemcpyH2DTooManyElements) { + Stream->thenMemcpyH2D(llvm::ArrayRef(Host), &DeviceA, 20); + EXPECT_FALSE(Stream->isOK()); +} + +TEST_F(StreamTest, MemcpyD2HTooManyElements) { + Stream->thenMemcpyD2H(DeviceA, llvm::MutableArrayRef(Host), 20); + EXPECT_FALSE(Stream->isOK()); +} + +TEST_F(StreamTest, MemcpyD2DTooManyElements) { + Stream->thenMemcpyD2D(DeviceA, &DeviceB, 20); + EXPECT_FALSE(Stream->isOK()); +} + +TEST_F(StreamTest, KernelLaunchNoArguments) { + using KernelType = se::TypedKernel<>; + auto MaybeKernel = KernelType::create(&MockExecutor, Spec); + EXPECT_TRUE(static_cast(MaybeKernel)); + Stream->thenLaunch(BlockDims, GridDims, *MaybeKernel); + EXPECT_TRUE(Stream->isOK()); +} + +TEST_F(StreamTest, KernelLaunchOneArgument) { + using KernelType = se::TypedKernel; + auto MaybeKernel = KernelType::create(&MockExecutor, Spec); + EXPECT_TRUE(static_cast(MaybeKernel)); + Stream->thenLaunch(BlockDims, GridDims, *MaybeKernel, 10.0f); + EXPECT_TRUE(Stream->isOK()); +} + +TEST_F(StreamTest, KernelLaunchSeveralArguments) { + using KernelType = se::TypedKernel, + se::SharedDeviceMemory>; + auto MaybeKernel = KernelType::create(&MockExecutor, Spec); + EXPECT_TRUE(static_cast(MaybeKernel)); + Stream->thenLaunch(BlockDims, GridDims, *MaybeKernel, 10.0f, DeviceA, + SharedDevice); + EXPECT_TRUE(Stream->isOK()); +} + +} // namespace