Skip to content

Commit 5b363dd

Browse files
committedAug 31, 2016
[StreamExecutor] Add Doxygen main page
Reviewers: jlebar Subscribers: jprice, parallel_libs-commits Differential Revision: https://reviews.llvm.org/D24066 llvm-svn: 280277
1 parent bd850f4 commit 5b363dd

File tree

7 files changed

+243
-5
lines changed

7 files changed

+243
-5
lines changed
 

‎parallel-libs/streamexecutor/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
6262
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter")
6363

6464
add_subdirectory(lib)
65+
add_subdirectory(examples)
6566

6667
if (STREAM_EXECUTOR_ENABLE_DOXYGEN)
6768
find_package(Doxygen REQUIRED)

‎parallel-libs/streamexecutor/Doxyfile.in

+2-2
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ LOOKUP_CACHE_SIZE = 0
398398
# normally produced when WARNINGS is set to YES.
399399
# The default value is: NO.
400400

401-
EXTRACT_ALL = NO
401+
EXTRACT_ALL = YES
402402

403403
# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will
404404
# be included in the documentation.
@@ -811,7 +811,7 @@ EXCLUDE_SYMBOLS =
811811
# that contain example code fragments that are included (see the \include
812812
# command).
813813

814-
EXAMPLE_PATH =
814+
EXAMPLE_PATH = @CMAKE_CURRENT_SOURCE_DIR@
815815

816816
# If the value of the EXAMPLE_PATH tag contains directories, you can use the
817817
# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
add_executable(example Example.cpp)
2+
target_link_libraries(example streamexecutor)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
//===-- Example.cpp - Example code for documentation ----------------------===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
///
10+
/// \file
11+
/// This file contains example code demonstrating the usage of the
12+
/// StreamExecutor API. Snippets of this file will be included as code examples
13+
/// in documentation. Taking these examples from a real source file guarantees
14+
/// that the examples will always compile.
15+
///
16+
//===----------------------------------------------------------------------===//
17+
18+
#include <cassert>
19+
#include <cstdio>
20+
#include <cstdlib>
21+
#include <memory>
22+
#include <vector>
23+
24+
#include "streamexecutor/StreamExecutor.h"
25+
26+
/// [Example saxpy host helper functions]
27+
// Example handler for streamexecutor::Expected return values.
28+
template <typename T> T getOrDie(streamexecutor::Expected<T> &&E) {
29+
if (!E) {
30+
std::fprintf(stderr, "Error extracting an expected value: %s.\n",
31+
streamexecutor::consumeAndGetMessage(E.takeError()).c_str());
32+
std::exit(EXIT_FAILURE);
33+
}
34+
return std::move(*E);
35+
}
36+
37+
// Example handler for streamexecutor::Error return values.
38+
void check(streamexecutor::Error &&E) {
39+
if (E) {
40+
std::fprintf(stderr, "Error encountered: %s.\n",
41+
streamexecutor::consumeAndGetMessage(std::move(E)).c_str());
42+
std::exit(EXIT_FAILURE);
43+
}
44+
}
45+
/// [Example saxpy host helper functions]
46+
47+
/// [Example saxpy compiler-generated]
48+
// Code in this namespace is generated by the compiler (e.g. clang).
49+
//
50+
// The name of this namespace may depend on the compiler that generated it, so
51+
// this is just an example name.
52+
namespace __compilergen {
53+
54+
// Specialization of the streamexecutor::Kernel template class for the parameter
55+
// types of the saxpy(float A, float *X, float *Y) kernel.
56+
using SaxpyKernel =
57+
streamexecutor::Kernel<float, streamexecutor::GlobalDeviceMemory<float>,
58+
streamexecutor::GlobalDeviceMemory<float>>;
59+
60+
// A string containing the PTX code generated by the device compiler for the
61+
// saxpy kernel. String contents not shown here.
62+
extern const char *SaxpyPTX;
63+
64+
// A global instance of a loader spec that knows how to load the code in the
65+
// SaxpyPTX string.
66+
static streamexecutor::MultiKernelLoaderSpec SaxpyLoaderSpec = []() {
67+
streamexecutor::MultiKernelLoaderSpec Spec;
68+
Spec.addCUDAPTXInMemory("saxpy", {{{2, 0}, SaxpyPTX}});
69+
return Spec;
70+
}();
71+
72+
} // namespace __compilergen
73+
/// [Example saxpy compiler-generated]
74+
75+
/// [Example saxpy host PTX]
76+
const char *__compilergen::SaxpyPTX = R"(
77+
.version 4.3
78+
.target sm_20
79+
.address_size 64
80+
81+
.visible .entry saxpy(.param .f32 A, .param .u64 X, .param .u64 Y) {
82+
.reg .f32 %AValue;
83+
.reg .f32 %XValue;
84+
.reg .f32 %YValue;
85+
.reg .f32 %Result;
86+
87+
.reg .b64 %XBaseAddrGeneric;
88+
.reg .b64 %YBaseAddrGeneric;
89+
.reg .b64 %XBaseAddrGlobal;
90+
.reg .b64 %YBaseAddrGlobal;
91+
.reg .b64 %XAddr;
92+
.reg .b64 %YAddr;
93+
.reg .b64 %ThreadByteOffset;
94+
95+
.reg .b32 %TID;
96+
97+
ld.param.f32 %AValue, [A];
98+
ld.param.u64 %XBaseAddrGeneric, [X];
99+
ld.param.u64 %YBaseAddrGeneric, [Y];
100+
cvta.to.global.u64 %XBaseAddrGlobal, %XBaseAddrGeneric;
101+
cvta.to.global.u64 %YBaseAddrGlobal, %YBaseAddrGeneric;
102+
mov.u32 %TID, %tid.x;
103+
mul.wide.u32 %ThreadByteOffset, %TID, 4;
104+
add.s64 %XAddr, %ThreadByteOffset, %XBaseAddrGlobal;
105+
add.s64 %YAddr, %ThreadByteOffset, %YBaseAddrGlobal;
106+
ld.global.f32 %XValue, [%XAddr];
107+
ld.global.f32 %YValue, [%YAddr];
108+
fma.rn.f32 %Result, %AValue, %XValue, %YValue;
109+
st.global.f32 [%XAddr], %Result;
110+
ret;
111+
}
112+
)";
113+
/// [Example saxpy host PTX]
114+
115+
int main() {
116+
/// [Example saxpy host main]
117+
namespace se = ::streamexecutor;
118+
namespace cg = ::__compilergen;
119+
120+
// Create some host data.
121+
float A = 42.0f;
122+
std::vector<float> HostX = {0, 1, 2, 3};
123+
std::vector<float> HostY = {4, 5, 6, 7};
124+
size_t ArraySize = HostX.size();
125+
126+
// Get a device object.
127+
se::Platform *Platform =
128+
getOrDie(se::PlatformManager::getPlatformByName("CUDA"));
129+
if (Platform->getDeviceCount() == 0) {
130+
return EXIT_FAILURE;
131+
}
132+
se::Device *Device = getOrDie(Platform->getDevice(0));
133+
134+
// Load the kernel onto the device.
135+
std::unique_ptr<cg::SaxpyKernel> Kernel =
136+
getOrDie(Device->createKernel<cg::SaxpyKernel>(cg::SaxpyLoaderSpec));
137+
138+
// Allocate memory on the device.
139+
se::GlobalDeviceMemory<float> X =
140+
getOrDie(Device->allocateDeviceMemory<float>(ArraySize));
141+
se::GlobalDeviceMemory<float> Y =
142+
getOrDie(Device->allocateDeviceMemory<float>(ArraySize));
143+
144+
// Run operations on a stream.
145+
std::unique_ptr<se::Stream> Stream = getOrDie(Device->createStream());
146+
Stream->thenCopyH2D<float>(HostX, X)
147+
.thenCopyH2D<float>(HostY, Y)
148+
.thenLaunch(ArraySize, 1, *Kernel, A, X, Y)
149+
.thenCopyD2H<float>(X, HostX);
150+
// Wait for the stream to complete.
151+
check(Stream->blockHostUntilDone());
152+
153+
// Process output data in HostX.
154+
std::vector<float> ExpectedX = {4, 47, 90, 133};
155+
for (size_t I = 0; I < ArraySize; ++I) {
156+
assert(HostX[I] == ExpectedX[I]);
157+
}
158+
159+
// Free device memory.
160+
check(Device->freeDeviceMemory(X));
161+
check(Device->freeDeviceMemory(Y));
162+
/// [Example saxpy host main]
163+
}

‎parallel-libs/streamexecutor/include/streamexecutor/Kernel.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
/// function as follows:
5656
/// \code
5757
/// namespace ccn = compiler_cuda_namespace;
58-
/// using KernelPtr = std::unique_ptr<cnn::SaxpyKernel>;
58+
/// using KernelPtr = std::unique_ptr<ccn::SaxpyKernel>;
5959
/// // Assumes Device is a pointer to the Device on which to launch the
6060
/// // kernel.
6161
/// //

‎parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,9 @@ class CUDAPTXInMemorySpec : public KernelLoaderSpec {
115115
/// Adds each item in SpecList to this object.
116116
///
117117
/// Does not take ownership of the PTXCode pointers in the SpecList elements.
118-
CUDAPTXInMemorySpec(llvm::StringRef KernelName,
119-
const llvm::ArrayRef<PTXSpec> SpecList);
118+
CUDAPTXInMemorySpec(
119+
llvm::StringRef KernelName,
120+
const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);
120121

121122
/// Returns a pointer to the PTX code for the requested compute capability.
122123
///
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//===-- StreamExecutor.h - Main include file for StreamExecutor -*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
/// \mainpage Getting Started
11+
///
12+
/// \b StreamExecutor is a wrapper around CUDA and OpenCL host-side programming
13+
/// models (runtimes). This abstraction cleanly permits host code to target
14+
/// either CUDA or OpenCL devices with identically-functioning data parallel
15+
/// kernels. It manages the execution of concurrent work targeting the
16+
/// accelerator, similar to a host-side Executor.
17+
///
18+
/// This version of StreamExecutor can be built either as a sub-project of the
19+
/// LLVM project or as a standalone project depending on LLVM as an external
20+
/// package.
21+
///
22+
/// Below is an example of the use of the StreamExecutor API:
23+
///
24+
/// \snippet examples/Example.cpp Example saxpy host main
25+
///
26+
/// In the example, a couple of handler functions are used to handle error
27+
/// return values in the StreamExecutor API:
28+
///
29+
/// \snippet examples/Example.cpp Example saxpy host helper functions
30+
///
31+
/// These are just example handler functions. A real application will likely
32+
/// want to define similar handlers of its own that log errors in an
33+
/// application-specific way, convert errors to the application's own
34+
/// error-handling framework, or try to recover from errors as appropriate.
35+
///
36+
/// The example also references some symbols from a compiler-generated
37+
/// namespace:
38+
///
39+
/// \snippet examples/Example.cpp Example saxpy compiler-generated
40+
///
41+
/// Instead of depending on the compiler to generate this code, you can
42+
/// technically write the code yourself, but this is not recommended because the
43+
/// code is very error-prone. For example, the template parameters for the
44+
/// Kernel specialization have to match the parameter types for the device
45+
/// kernel, and the MultiKernelLoaderSpec has to be initialized with valid
46+
/// device code for the kernel. Errors in this code will not show up until
47+
/// runtime, and may only show up as garbage output rather than an explicit
48+
/// error, which can be very hard to debug, so again, it is strongly advised not
49+
/// to write this code yourself.
50+
///
51+
/// The example compiler-generated code uses a PTX string in the source code to
52+
/// store the device code, but the device code can also be stored in other
53+
/// formats such as CUDA cubin and CUDA fatbin. Furthermore, device code can be
54+
/// stored for other platforms such as OpenCL, and StreamExecutor will pick the
55+
/// right device code at runtime based on the user's platform selection. See
56+
/// streamexecutor::MultiKernelLoaderSpec for details of how device code can be
57+
/// stored for different platforms, but again, the code to set up the
58+
/// MultiKernelLoaderSpec instance should be generated by the compiler if
59+
/// possible, not by the user.
60+
61+
#ifndef STREAMEXECUTOR_STREAMEXECUTOR_H
62+
#define STREAMEXECUTOR_STREAMEXECUTOR_H
63+
64+
#include "Device.h"
65+
#include "Kernel.h"
66+
#include "KernelSpec.h"
67+
#include "Platform.h"
68+
#include "PlatformManager.h"
69+
#include "Stream.h"
70+
71+
#endif // STREAMEXECUTOR_STREAMEXECUTOR_H

0 commit comments

Comments
 (0)
Please sign in to comment.