Index: streamexecutor/Doxyfile.in =================================================================== --- streamexecutor/Doxyfile.in +++ streamexecutor/Doxyfile.in @@ -811,7 +811,7 @@ # that contain example code fragments that are included (see the \include # command). -EXAMPLE_PATH = @CMAKE_CURRENT_SOURCE_DIR@ +EXAMPLE_PATH = @CMAKE_CURRENT_SOURCE_DIR@/examples # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and @@ -825,7 +825,7 @@ # irrespective of the value of the RECURSIVE tag. # The default value is: NO. -EXAMPLE_RECURSIVE = NO +EXAMPLE_RECURSIVE = YES # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the @@ -983,7 +983,7 @@ # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. -COLS_IN_ALPHA_INDEX = 5 +COLS_IN_ALPHA_INDEX = 1 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag @@ -1068,7 +1068,7 @@ # see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_EXTRA_STYLESHEET = +HTML_EXTRA_STYLESHEET = @CMAKE_CURRENT_SOURCE_DIR@/customdoxygen.css # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note @@ -1352,7 +1352,7 @@ # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. -GENERATE_TREEVIEW = NO +GENERATE_TREEVIEW = YES # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. Index: streamexecutor/customdoxygen.css =================================================================== --- /dev/null +++ streamexecutor/customdoxygen.css @@ -0,0 +1,20 @@ +body { + background-color: #e0e0eb; +} + +div.header { + margin-left: auto; + margin-right: auto; + max-width: 60em; + padding-left: 2em; + padding-right: 2em; +} + +div.contents { + margin-left: auto; + margin-right: auto; + max-width: 60em; + background-color: white; + padding: 2em; + border-radius: 1em; +} Index: streamexecutor/examples/CMakeLists.txt =================================================================== --- streamexecutor/examples/CMakeLists.txt +++ streamexecutor/examples/CMakeLists.txt @@ -1,2 +1,2 @@ -add_executable(example Example.cpp) -target_link_libraries(example streamexecutor) +add_executable(cuda_saxpy_example CUDASaxpy.cpp) +target_link_libraries(cuda_saxpy_example streamexecutor) Index: streamexecutor/examples/CUDASaxpy.cpp =================================================================== --- streamexecutor/examples/CUDASaxpy.cpp +++ streamexecutor/examples/CUDASaxpy.cpp @@ -1,4 +1,4 @@ -//===-- Example.cpp - Example code for documentation ----------------------===// +//===-- CUDASaxpy.cpp - Example of CUDA saxpy with StreamExecutor API -----===// // // The LLVM Compiler Infrastructure // @@ -15,6 +15,7 @@ /// //===----------------------------------------------------------------------===// +#include #include #include #include @@ -51,6 +52,7 @@ /// [Example saxpy compiler-generated] /// [Example saxpy host PTX] +// The PTX text for a saxpy kernel. const char *__compilergen::SaxpyPTX = R"( .version 4.3 .target sm_20 @@ -130,8 +132,6 @@ // Process output data in HostX. std::vector ExpectedX = {4, 47, 90, 133}; - for (size_t I = 0; I < ArraySize; ++I) { - assert(HostX[I] == ExpectedX[I]); - } + assert(std::equal(ExpectedX.begin(), ExpectedX.end(), HostX.begin())); /// [Example saxpy host main] } Index: streamexecutor/include/streamexecutor/StreamExecutor.h =================================================================== --- streamexecutor/include/streamexecutor/StreamExecutor.h +++ streamexecutor/include/streamexecutor/StreamExecutor.h @@ -7,8 +7,9 @@ // //===----------------------------------------------------------------------===// -/// \mainpage Getting Started +/// \mainpage Welcome to StreamExecutor /// +/// \section Introduction /// \b StreamExecutor is a wrapper around CUDA and OpenCL host-side programming /// models (runtimes). This abstraction cleanly permits host code to target /// either CUDA or OpenCL devices with identically-functioning data parallel @@ -19,9 +20,10 @@ /// LLVM project or as a standalone project depending on LLVM as an external /// package. /// +/// \subsection ExampleUsage Example Usage /// Below is an example of the use of the StreamExecutor API: /// -/// \snippet examples/Example.cpp Example saxpy host main +/// \snippet examples/CUDASaxpy.cpp Example saxpy host main /// /// In the example, a couple of handler functions, \c getOrDie and \c /// dieIfError, are used to handle error return values in the StreamExecutor @@ -30,10 +32,12 @@ /// versions of these handlers so that errors are handled more gracefully than /// just exiting the program. /// +/// \subsection CompilerGeneratedCode Compiler-Generated Code +/// /// The example also references some symbols from a compiler-generated /// namespace: /// -/// \snippet examples/Example.cpp Example saxpy compiler-generated +/// \snippet examples/CUDASaxpy.cpp Example saxpy compiler-generated /// /// Instead of depending on the compiler to generate this code, you can /// technically write the code yourself, but this is not recommended because the @@ -55,6 +59,9 @@ /// MultiKernelLoaderSpec instance should be generated by the compiler if /// possible, not by the user. +/// \example examples/CUDASaxpy.cpp +/// Running saxpy on a CUDA device. + #ifndef STREAMEXECUTOR_STREAMEXECUTOR_H #define STREAMEXECUTOR_STREAMEXECUTOR_H