Index: streamexecutor/CMakeLists.txt =================================================================== --- streamexecutor/CMakeLists.txt +++ streamexecutor/CMakeLists.txt @@ -24,6 +24,18 @@ include_directories(${LLVM_INCLUDE_DIRS}) add_definitions(${LLVM_DEFINITIONS}) + # Get the LLVM cxxflags by using llvm-config. + # + # This is necessary to get -fno-rtti if LLVM is compiled that way. + execute_process( + COMMAND + "${LLVM_BINARY_DIR}/bin/llvm-config" + --cxxflags + OUTPUT_VARIABLE + LLVM_CXXFLAGS + OUTPUT_STRIP_TRAILING_WHITESPACE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${LLVM_CXXFLAGS}") + # Find the libraries that correspond to the LLVM components # that we wish to use llvm_map_components_to_libnames(llvm_libs support symbolize) Index: streamexecutor/include/streamexecutor/Interfaces.h =================================================================== --- streamexecutor/include/streamexecutor/Interfaces.h +++ streamexecutor/include/streamexecutor/Interfaces.h @@ -22,7 +22,9 @@ // TODO(jhen): Add methods. }; -// TODO(jhen): Add other interfaces such as Stream. +class StreamInterface { + // TODO(jhen): Add methods. +}; } // namespace streamexecutor Index: streamexecutor/include/streamexecutor/LaunchDimensions.h =================================================================== --- /dev/null +++ streamexecutor/include/streamexecutor/LaunchDimensions.h @@ -0,0 +1,47 @@ +//===-- LaunchDimensions.h - Kernel block and grid sizes --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Structures to hold sizes for blocks and grids which are used as parameters +/// for kernel launches. +/// +//===----------------------------------------------------------------------===// + +#ifndef STREAMEXECUTOR_LAUNCHDIMENSIONS_H +#define STREAMEXECUTOR_LAUNCHDIMENSIONS_H + +namespace streamexecutor { + +/// The dimensions of a device block of execution. +/// +/// A block is made up of an array of X by Y by Z threads. +struct BlockDimensions { + BlockDimensions(unsigned X = 1, unsigned Y = 1, unsigned Z = 1) + : X(X), Y(Y), Z(Z) {} + + unsigned X; + unsigned Y; + unsigned Z; +}; + +/// The dimensions of a device grid of execution. +/// +/// A grid is made up of an array of X by Y by Z blocks. +struct GridDimensions { + GridDimensions(unsigned X = 1, unsigned Y = 1, unsigned Z = 1) + : X(X), Y(Y), Z(Z) {} + + unsigned X; + unsigned Y; + unsigned Z; +}; + +} // namespace streamexecutor + +#endif // STREAMEXECUTOR_LAUNCHDIMENSIONS_H Index: streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h =================================================================== --- streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h +++ streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h @@ -47,6 +47,12 @@ /// efficiently, although it is probably more information than is needed for any /// specific platform. /// +/// The PackedKernelArgumentArrayBase class has no template parameters, so it +/// does not benefit from compile-time type checking. However, since it has no +/// template parameters, it can be passed as an argument to virtual functions, +/// and this allows it to be passed to functions that use virtual function +/// overloading to handle platform-specific kernel launching. +/// //===----------------------------------------------------------------------===// #ifndef STREAMEXECUTOR_PACKEDKERNELARGUMENTARRAY_H @@ -64,39 +70,81 @@ SHARED_DEVICE_MEMORY /// Shared device memory argument. }; -/// An array of packed kernel arguments. -template class PackedKernelArgumentArray { +/// An array of packed kernel arguments without compile-time type information. +/// +/// This un-templated base class is useful because packed kernel arguments must +/// at some point be passed to a virtual function that performs +/// platform-specific kernel launches. Such a virtual function cannot be +/// templated to handle all specializations of the +/// PackedKernelArgumentArray<...> class template, so, instead, references to +/// PackedKernelArgumentArray<...> are passed as references to this base class. +class PackedKernelArgumentArrayBase { public: - /// Constructs an instance by packing the specified arguments. - PackedKernelArgumentArray(const ParameterTs &... Arguments) - : SharedCount(0u) { - PackArguments(0, Arguments...); - } + virtual ~PackedKernelArgumentArrayBase(); /// Gets the number of packed arguments. - size_t getArgumentCount() const { return sizeof...(ParameterTs); } + size_t getArgumentCount() const { return ArgumentCount; } /// Gets the address of the argument at the given index. - const void *getAddress(size_t Index) const { return Addresses[Index]; } + const void *getAddress(size_t Index) const { return AddressesData[Index]; } /// Gets the size of the argument at the given index. - size_t getSize(size_t Index) const { return Sizes[Index]; } + size_t getSize(size_t Index) const { return SizesData[Index]; } /// Gets the type of the argument at the given index. - KernelArgumentType getType(size_t Index) const { return Types[Index]; } + KernelArgumentType getType(size_t Index) const { return TypesData[Index]; } /// Gets a pointer to the address array. - const void *const *getAddresses() const { return Addresses.data(); } + const void *const *getAddresses() const { return AddressesData; } /// Gets a pointer to the sizes array. - const size_t *getSizes() const { return Sizes.data(); } + const size_t *getSizes() const { return SizesData; } /// Gets a pointer to the types array. - const KernelArgumentType *getTypes() const { return Types.data(); } + const KernelArgumentType *getTypes() const { return TypesData; } /// Gets the number of shared device memory arguments. size_t getSharedCount() const { return SharedCount; } +protected: + PackedKernelArgumentArrayBase(size_t ArgumentCount) + : ArgumentCount(ArgumentCount), SharedCount(0u) {} + + size_t ArgumentCount; + size_t SharedCount; + const void *const *AddressesData; + size_t *SizesData; + KernelArgumentType *TypesData; +}; + +/// An array of packed kernel arguments with compile-time type information. +/// +/// This is used by the platform-independent StreamExecutor code to pack +/// arguments in a compile-time type-safe way. In order to actually launch a +/// kernel on a specific platform, however, a reference to this class will have +/// to be passed to a virtual, platform-specific kernel launch function. Such a +/// reference will be passed as a reference to the base class rather than a +/// reference to this subclass itself because a virtual function cannot be +/// templated in such a way to maintain the template parameter types of the +/// subclass. +template +class PackedKernelArgumentArray : public PackedKernelArgumentArrayBase { +public: + /// Constructs an instance by packing the specified arguments. + /// + /// Rather than using this constructor directly, consider using the + /// make_kernel_argument_pack function instead, to get the compiler to infer + /// the parameter types for you. + PackedKernelArgumentArray(const ParameterTs &... Arguments) + : PackedKernelArgumentArrayBase(sizeof...(ParameterTs)) { + AddressesData = Addresses.data(); + SizesData = Sizes.data(); + TypesData = Types.data(); + PackArguments(0, Arguments...); + } + + ~PackedKernelArgumentArray() override = default; + private: // Base case for PackArguments when there are no arguments to pack. void PackArguments(size_t) {} @@ -215,7 +263,6 @@ std::array Addresses; std::array Sizes; std::array Types; - size_t SharedCount; }; // Utility template function to call the PackedKernelArgumentArray constructor Index: streamexecutor/include/streamexecutor/Stream.h =================================================================== --- /dev/null +++ streamexecutor/include/streamexecutor/Stream.h @@ -0,0 +1,267 @@ +//===-- Stream.h - A stream of execution ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// A Stream instance represents a queue of sequential, host-asynchronous work +/// to be performed on a device. +/// +/// To enqueue work on a device, first create a StreamExecutor instance for a +/// given device and then use that StreamExecutor to create a Stream instance. +/// The Stream instance will perform its work on the device managed by the +/// StreamExecutor that created it. +/// +/// The various "then" methods of the Stream object, such as thenMemcpyH2D and +/// thenLaunch, may be used to enqueue work on the Stream and the +/// blockHostUntilDone() method may be used to block the host code until the +/// Stream has completed all its work. +/// +/// Multiple Stream instances can be created for the same StreamExecutor and +/// this will allow several independent streams of computation can be performed +/// simultaneously on a single device. +/// +//===----------------------------------------------------------------------===// + +#ifndef STREAMEXECUTOR_STREAM_H +#define STREAMEXECUTOR_STREAM_H + +#include +#include +#include + +#include "streamexecutor/DeviceMemory.h" +#include "streamexecutor/Kernel.h" +#include "streamexecutor/LaunchDimensions.h" +#include "streamexecutor/PackedKernelArgumentArray.h" +#include "streamexecutor/Utils/Error.h" + +#include "llvm/ADT/Twine.h" +#include "llvm/Support/RWMutex.h" + +namespace streamexecutor { + +class StreamExecutor; +class StreamInterface; + +/// Key struct to allow class StreamExecutor to execute certain methods in class +/// Stream without making those methods accessible to other callers. +/// +/// This struct can only be constructed by StreamExecutor, so methods of class +/// Stream that take a StreamExecutorKey argument will only be callable by +/// StreamExecutor. +struct StreamExecutorKey { +public: + // A pointer to a StreamExecutor. Probably the one that created this key. + StreamExecutor *TheStreamExecutor; + +private: + friend class StreamExecutor; + + // Allow implicit conversion so StreamExecutor code can just pass "this" as + // the key. + StreamExecutorKey(StreamExecutor *S) : TheStreamExecutor(S) {} + + StreamExecutorKey(const StreamExecutorKey &) = default; + StreamExecutorKey &operator=(const StreamExecutorKey &) = default; +}; + +/// Represents a stream of dependent computations on a device. +/// +/// The operations within a stream execute sequentially and asynchronously until +/// blockHostUntilDone() is invoked, which synchronously joins host code with +/// the execution of the stream. +/// +/// If any given operation fails when entraining work for the stream, isOK() +/// will indicate that an error has occurred and getStatus() will get the first +/// error that occurred on the stream. There is no way to clear the error state +/// of a stream once it is in an error state. +class Stream { +public: + // Only a StreamExecutor can construct a Stream because only a StreamExecutor + // can create a StreamExecutorKey. + Stream(StreamExecutorKey SK, std::unique_ptr Implementation); + + Stream(Stream &&Other); + Stream &operator=(Stream &&Other); + ~Stream(); + + /// Returns whether any error has occurred while entraining work on this + /// stream. + bool isOK() const { + llvm::sys::ScopedReader ReaderLock(ErrorMessageMutex); + return ErrorMessage == nullptr; + } + + /// Returns the status created by the first error that occurred while + /// entraining work on this stream. + Error getStatus() const { + llvm::sys::ScopedReader ReaderLock(ErrorMessageMutex); + if (ErrorMessage) { + return make_error(*ErrorMessage); + } else { + return Error::success(); + } + }; + + /// Entrains onto the stream of operations a kernel launch with the given + /// arguments for the invocation. + /// + /// These arguments can be device memory types like GlobalDeviceMemory and + /// SharedDeviceMemory, or they can be primitive types such as int. The + /// allowable argument types are determined by the template parameters to the + /// TypedKernel argument. + template + Stream &thenLaunch(BlockDimensions BlockSize, GridDimensions GridSize, + const TypedKernel &Kernel, + Params... Arguments) { + auto ArgumentArray = make_kernel_argument_pack(Arguments...); + return thenRawLaunch(BlockSize, GridSize, Kernel, ArgumentArray); + } + + /// Entrain onto the stream a memcpy to a host destination from a device + /// source of the given size. + /// + /// HostDst must be a pointer to host memory allocated by + /// StreamExecutor::hostMemoryAllocate or otherwise allocated and then + /// registered with StreamExecutor::hostMemoryRegister. + template + Stream &thenMemcpyD2H(const GlobalDeviceMemory &DeviceSrc, + llvm::MutableArrayRef HostDst, bool FullCopy = true, + size_t ElementCount = 0) { + if (FullCopy) { + ElementCount = DeviceSrc.getElementCount(); + } + if (ElementCount > DeviceSrc.getElementCount()) { + setError("copying too many elements, " + llvm::Twine(ElementCount) + + ", from device memory array of size " + + llvm::Twine(DeviceSrc.getElementCount())); + } else if (ElementCount > HostDst.size()) { + setError("copying too many elements, " + llvm::Twine(ElementCount) + + ", to host array of size " + llvm::Twine(HostDst.size())); + } else { + thenRawMemcpyD2H(DeviceSrc, HostDst.data(), ElementCount * sizeof(T)); + } + return *this; + } + + /// Entrain onto the stream a memcpy to a device destination from a host + /// source of the given size. + /// + /// HostSrc must be a pointer to host memory allocated by + /// StreamExecutor::hostMemoryAllocate or otherwise allocated and then + /// registered with StreamExecutor::hostMemoryRegister. + template + Stream &thenMemcpyH2D(llvm::ArrayRef HostSrc, + GlobalDeviceMemory *DeviceDst, bool FullCopy = true, + size_t ElementCount = 0) { + if (FullCopy) { + ElementCount = HostSrc.size(); + } + if (ElementCount > HostSrc.size()) { + setError("copying too many elements, " + llvm::Twine(ElementCount) + + ", from host array of size " + llvm::Twine(HostSrc.size())); + } else if (ElementCount > DeviceDst->getElementCount()) { + setError("copying too many elements, " + llvm::Twine(ElementCount) + + ", to device memory array of size " + + llvm::Twine(DeviceDst->getElementCount())); + } else { + thenRawMemcpyH2D(HostSrc.data(), DeviceDst, ElementCount * sizeof(T)); + } + return *this; + } + + /// Entrain onto the stream a memcpy to a device destination from a device + /// source of the given size. + template + Stream &thenMemcpyD2D(const GlobalDeviceMemory &DeviceSrc, + GlobalDeviceMemory *DeviceDst, bool FullCopy = true, + size_t ElementCount = 0) { + if (FullCopy) { + ElementCount = DeviceSrc.getElementCount(); + } + if (ElementCount > DeviceSrc.getElementCount()) { + setError("copying too many elements, " + llvm::Twine(ElementCount) + + ", from device memory array of size " + + llvm::Twine(DeviceSrc.getElementCount())); + } else if (ElementCount > DeviceDst->getElementCount()) { + setError("copying too many elements, " + llvm::Twine(ElementCount) + + ", to device memory array of size " + + llvm::Twine(DeviceDst->getElementCount())); + } else { + thenRawMemcpyD2D(DeviceSrc, DeviceDst, ElementCount * sizeof(T)); + } + return *this; + } + + /// Blocks the host code, waiting for the operations entrained on the stream + /// (enqueued to this point in program execution) to complete. + /// + /// Returns true if there are no errors on the stream. + bool blockHostUntilDone(); + +private: + /// Sets the error state from an Error object. + /// + /// Does not overwrite the error if it is already set. + void setError(Error &&E) { + if (E) { + llvm::sys::ScopedWriter WriterLock(ErrorMessageMutex); + if (ErrorMessage == nullptr) { + ErrorMessage = + llvm::make_unique(consumeAndGetMessage(std::move(E))); + } + } + } + + /// Sets the error state from an error message. + /// + /// Does not overwrite the error if it is already set. + void setError(llvm::Twine Message) { + llvm::sys::ScopedWriter WriterLock(ErrorMessageMutex); + if (ErrorMessage == nullptr) { + ErrorMessage = llvm::make_unique(Message.str()); + } + } + + Stream &thenRawLaunch(BlockDimensions BlockSize, GridDimensions GridSize, + const KernelBase &Kernel, + const PackedKernelArgumentArrayBase &ArgumentArray); + + Stream &thenRawMemcpyD2H(const GlobalDeviceMemoryBase &DeviceSrc, + void *HostDst, size_t ByteCount); + + Stream &thenRawMemcpyH2D(const void *HostSrc, + GlobalDeviceMemoryBase *DeviceDst, size_t ByteCount); + + Stream &thenRawMemcpyD2D(const GlobalDeviceMemoryBase &DeviceSrc, + GlobalDeviceMemoryBase *DeviceDst, size_t ByteCount); + + /// The StreamExecutor that supports the operations of this stream. + StreamExecutor *Parent; + + /// Platform-specific implementation. + std::unique_ptr Implementation; + + /// Mutex that guards the error state flags. + /// + /// Mutable so that it can be obtained via const reader lock. + mutable llvm::sys::RWMutex ErrorMessageMutex; + + /// First error message for an operation in this stream. + /// + /// If nullptr, there have been no errors in this stream. + std::unique_ptr ErrorMessage; + + Stream(const Stream &) = delete; + void operator=(const Stream &) = delete; +}; + +} // namespace streamexecutor + +#endif // STREAMEXECUTOR_STREAM_H Index: streamexecutor/include/streamexecutor/StreamExecutor.h =================================================================== --- streamexecutor/include/streamexecutor/StreamExecutor.h +++ streamexecutor/include/streamexecutor/StreamExecutor.h @@ -16,11 +16,34 @@ #ifndef STREAMEXECUTOR_STREAMEXECUTOR_H #define STREAMEXECUTOR_STREAMEXECUTOR_H +#include "streamexecutor/DeviceMemory.h" +#include "streamexecutor/Stream.h" #include "streamexecutor/Utils/Error.h" namespace streamexecutor { class KernelInterface; +class Stream; + +/// Key struct to allow class Stream to execute certain methods in class +/// StreamExecutor without making those methods accessible to other callers. +/// +/// This struct can only be constructed by Stream, so methods of class +/// StreamExecutor that take a StreamKey argument will only be callable by +/// Stream. +struct StreamKey { +public: + Stream *TheStream; + +private: + friend class Stream; + + // Allow implicit conversion so Stream code can just pass "this" as the key. + StreamKey(Stream *S) : TheStream(S) {} + + StreamKey(const StreamKey &) = default; + StreamKey &operator=(const StreamKey &) = default; +}; class StreamExecutor { public: @@ -31,7 +54,54 @@ return nullptr; } - // TODO(jhen): Add other methods. + Expected> createStream() { + // TODO(jhen): Get the real implementation pointer. + return std::unique_ptr(new Stream(this, nullptr)); + } + + /// \name Stream-only interface. + /// + /// Methods that should only be called by a Stream. This protection is + /// enforced by requiring a StreamKey argument because only Stream objects can + /// create StreamKey instances. + ///@{ + + Error launch(StreamKey SK, BlockDimensions BlockSize, GridDimensions GridSize, + const KernelBase &Kernel, + const PackedKernelArgumentArrayBase &ArgumentArray) { + // TODO(jhen): Implement this. + return Error::success(); + } + + Error deallocateStream(StreamKey SK) { + // TODO(jhen): Implement this. + return Error::success(); + } + + Error memcpyD2H(StreamKey SK, const GlobalDeviceMemoryBase &DeviceSrc, + void *HostDst, size_t ByteCount) { + // TODO(jhen): Implement this. + return Error::success(); + } + + Error memcpyH2D(StreamKey SK, const void *HostSrc, + GlobalDeviceMemoryBase *DeviceDst, size_t ByteCount) { + // TODO(jhen): Implement this. + return Error::success(); + } + + Error memcpyD2D(StreamKey SK, const GlobalDeviceMemoryBase &DeviceSrc, + GlobalDeviceMemoryBase *DeviceDst, size_t ByteCount) { + // TODO(jhen): Implement this. + return Error::success(); + } + + Error blockHostUntilDone(StreamKey SK) { + // TODO(jhen): Implement this. + return Error::success(); + } + + ///@} End stream-only interface }; } // namespace streamexecutor Index: streamexecutor/lib/CMakeLists.txt =================================================================== --- streamexecutor/lib/CMakeLists.txt +++ streamexecutor/lib/CMakeLists.txt @@ -7,7 +7,9 @@ streamexecutor $ Kernel.cpp - KernelSpec.cpp) + KernelSpec.cpp + PackedKernelArgumentArray.cpp + Stream.cpp) target_link_libraries(streamexecutor ${llvm_libs}) if(STREAM_EXECUTOR_UNIT_TESTS) Index: streamexecutor/lib/PackedKernelArgumentArray.cpp =================================================================== --- /dev/null +++ streamexecutor/lib/PackedKernelArgumentArray.cpp @@ -0,0 +1,21 @@ +//===-- PackedKernelArgumentArray.cpp - Packed argument array impl --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implementation details for classes from PackedKernelArgumentArray.h. +/// +//===----------------------------------------------------------------------===// + +#include "streamexecutor/PackedKernelArgumentArray.h" + +namespace streamexecutor { + +PackedKernelArgumentArrayBase::~PackedKernelArgumentArrayBase() = default; + +} // namespace streamexecutor Index: streamexecutor/lib/Stream.cpp =================================================================== --- /dev/null +++ streamexecutor/lib/Stream.cpp @@ -0,0 +1,75 @@ +//===-- Stream.cpp - General stream implementation ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the implementation details for a general stream object. +/// +//===----------------------------------------------------------------------===// + +#include "streamexecutor/Stream.h" + +#include "streamexecutor/Interfaces.h" +#include "streamexecutor/StreamExecutor.h" + +namespace streamexecutor { + +Stream::Stream(StreamExecutorKey SK, + std::unique_ptr Implementation) + : Parent(SK.TheStreamExecutor), Implementation(std::move(Implementation)) {} + +Stream::Stream(Stream &&Other) + : Parent(std::move(Other.Parent)), + Implementation(std::move(Other.Implementation)), + ErrorMessage(std::move(Other.ErrorMessage)) {} + +Stream &Stream::operator=(Stream &&Other) { + this->Parent = std::move(Other.Parent); + this->Implementation = std::move(Other.Implementation); + this->ErrorMessage = std::move(Other.ErrorMessage); + return *this; +} + +Stream::~Stream() { consumeError(Parent->deallocateStream(this)); } + +Stream & +Stream::thenRawLaunch(BlockDimensions BlockSize, GridDimensions GridSize, + const KernelBase &Kernel, + const PackedKernelArgumentArrayBase &ArgumentArray) { + setError(Parent->launch(this, BlockSize, GridSize, Kernel, ArgumentArray)); + return *this; +} + +Stream &Stream::thenRawMemcpyD2H(const GlobalDeviceMemoryBase &DeviceSrc, + void *HostDst, size_t ByteCount) { + setError(Parent->memcpyD2H(this, DeviceSrc, HostDst, ByteCount)); + return *this; +} + +Stream &Stream::thenRawMemcpyH2D(const void *HostSrc, + GlobalDeviceMemoryBase *DeviceDst, + size_t ByteCount) { + setError(Parent->memcpyH2D(this, HostSrc, DeviceDst, ByteCount)); + return *this; +} + +Stream &Stream::thenRawMemcpyD2D(const GlobalDeviceMemoryBase &DeviceSrc, + GlobalDeviceMemoryBase *DeviceDst, + size_t ByteCount) { + setError(Parent->memcpyD2D(this, DeviceSrc, DeviceDst, ByteCount)); + return *this; +} + +bool Stream::blockHostUntilDone() { + if (isOK()) { + setError(Parent->blockHostUntilDone(this)); + } + return isOK(); +} + +} // namespace streamexecutor Index: streamexecutor/lib/unittests/CMakeLists.txt =================================================================== --- streamexecutor/lib/unittests/CMakeLists.txt +++ streamexecutor/lib/unittests/CMakeLists.txt @@ -23,7 +23,19 @@ PackedKernelArgumentArrayTest.cpp) target_link_libraries( packed_kernel_argument_array_test + streamexecutor ${llvm_libs} ${GTEST_BOTH_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) add_test(PackedKernelArgumentArrayTest packed_kernel_argument_array_test) + +add_executable( + stream_test + StreamTest.cpp) +target_link_libraries( + stream_test + streamexecutor + ${llvm_libs} + ${GTEST_BOTH_LIBRARIES} + ${CMAKE_THREAD_LIBS_INIT}) +add_test(StreamTest stream_test) Index: streamexecutor/lib/unittests/StreamTest.cpp =================================================================== --- /dev/null +++ streamexecutor/lib/unittests/StreamTest.cpp @@ -0,0 +1,117 @@ +//===-- StreamTest.cpp - Tests for Stream ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the unit tests for Stream code. +/// +//===----------------------------------------------------------------------===// + +#include "streamexecutor/Interfaces.h" +#include "streamexecutor/Kernel.h" +#include "streamexecutor/KernelSpec.h" +#include "streamexecutor/Stream.h" +#include "streamexecutor/StreamExecutor.h" + +#include "gtest/gtest.h" + +namespace { + +namespace se = ::streamexecutor; + +// A StreamExecutor that just returns a dummy KernelInterface when +// getKernelImplementation is called. +// +// Used to build dummy kernels for testing kernel launching. +class MockStreamExecutor : public se::StreamExecutor { +public: + se::Expected> + getKernelImplementation(const se::MultiKernelLoaderSpec &) override { + return llvm::make_unique(); + } +}; + +/// Test fixture to hold objects used by tests. +class StreamTest : public ::testing::Test { +public: + StreamTest() + : DeviceA(se::GlobalDeviceMemory::makeFromElementCount(nullptr, 10)), + DeviceB(se::GlobalDeviceMemory::makeFromElementCount(nullptr, 10)), + SharedDevice(se::SharedDeviceMemory::makeFromElementCount(20)) { + auto MaybeStream = Executor.createStream(); + if (MaybeStream) { + Stream = std::move(*MaybeStream); + } + } + +protected: + se::StreamExecutor Executor; + std::unique_ptr Stream; + int Host[10]; + se::GlobalDeviceMemory DeviceA; + se::GlobalDeviceMemory DeviceB; + se::SharedDeviceMemory SharedDevice; + se::BlockDimensions BlockDims; + se::GridDimensions GridDims; + se::MultiKernelLoaderSpec Spec; + MockStreamExecutor MockExecutor; +}; + +TEST_F(StreamTest, MemcpyCorrectSize) { + Stream->thenMemcpyH2D(llvm::ArrayRef(Host), &DeviceA); + EXPECT_TRUE(Stream->isOK()); + + Stream->thenMemcpyD2H(DeviceA, llvm::MutableArrayRef(Host)); + EXPECT_TRUE(Stream->isOK()); + + Stream->thenMemcpyD2D(DeviceA, &DeviceB); + EXPECT_TRUE(Stream->isOK()); +} + +TEST_F(StreamTest, MemcpyH2DTooManyElements) { + Stream->thenMemcpyH2D(llvm::ArrayRef(Host), &DeviceA, false, 20); + EXPECT_FALSE(Stream->isOK()); +} + +TEST_F(StreamTest, MemcpyD2HTooManyElements) { + Stream->thenMemcpyD2H(DeviceA, llvm::MutableArrayRef(Host), false, 20); + EXPECT_FALSE(Stream->isOK()); +} + +TEST_F(StreamTest, MemcpyD2DTooManyElements) { + Stream->thenMemcpyD2D(DeviceA, &DeviceB, false, 20); + EXPECT_FALSE(Stream->isOK()); +} + +TEST_F(StreamTest, KernelLaunchNoArguments) { + using KernelType = se::TypedKernel<>; + auto MaybeKernel = KernelType::create(&MockExecutor, Spec); + EXPECT_TRUE(static_cast(MaybeKernel)); + Stream->thenLaunch(BlockDims, GridDims, *MaybeKernel); + EXPECT_TRUE(Stream->isOK()); +} + +TEST_F(StreamTest, KernelLaunchOneArgument) { + using KernelType = se::TypedKernel; + auto MaybeKernel = KernelType::create(&MockExecutor, Spec); + EXPECT_TRUE(static_cast(MaybeKernel)); + Stream->thenLaunch(BlockDims, GridDims, *MaybeKernel, 10.0f); + EXPECT_TRUE(Stream->isOK()); +} + +TEST_F(StreamTest, KernelLaunchSeveralArguments) { + using KernelType = se::TypedKernel, + se::SharedDeviceMemory>; + auto MaybeKernel = KernelType::create(&MockExecutor, Spec); + EXPECT_TRUE(static_cast(MaybeKernel)); + Stream->thenLaunch(BlockDims, GridDims, *MaybeKernel, 10.0f, DeviceA, + SharedDevice); + EXPECT_TRUE(Stream->isOK()); +} + +} // namespace