Index: clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp =================================================================== --- clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp +++ clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp @@ -10,6 +10,7 @@ #include "../ClangTidyModule.h" #include "../ClangTidyModuleRegistry.h" #include "KernelNameRestrictionCheck.h" +#include "SingleWorkItemBarrierCheck.h" #include "StructPackAlignCheck.h" using namespace clang::ast_matchers; @@ -23,6 +24,8 @@ void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { CheckFactories.registerCheck( "altera-kernel-name-restriction"); + CheckFactories.registerCheck( + "altera-single-work-item-barrier"); CheckFactories.registerCheck( "altera-struct-pack-align"); } Index: clang-tools-extra/clang-tidy/altera/CMakeLists.txt =================================================================== --- clang-tools-extra/clang-tidy/altera/CMakeLists.txt +++ clang-tools-extra/clang-tidy/altera/CMakeLists.txt @@ -6,6 +6,7 @@ add_clang_library(clangTidyAlteraModule AlteraTidyModule.cpp KernelNameRestrictionCheck.cpp + SingleWorkItemBarrierCheck.cpp StructPackAlignCheck.cpp LINK_LIBS Index: clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.h =================================================================== --- /dev/null +++ clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.h @@ -0,0 +1,40 @@ +//===--- SingleWorkItemBarrierCheck.h - clang-tidy---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace altera { + +/// Detects OpenCL kernel functions that call a barrier but do not call an +/// ID-function function. These functions will be treated as single work-item +/// kernels, which may be inefficient or cause an error. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/opencl-single-work-item-barrier.html +class SingleWorkItemBarrierCheck : public ClangTidyCheck { + const unsigned AOCVersion; + +public: + SingleWorkItemBarrierCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + AOCVersion(Options.get("AOCVersion", 1600U)) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; +}; + +} // namespace altera +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H Index: clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp @@ -0,0 +1,84 @@ +//===--- SingleWorkItemBarrierCheck.cpp - clang-tidy-----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SingleWorkItemBarrierCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace altera { + +void SingleWorkItemBarrierCheck::registerMatchers(MatchFinder *Finder) { + // Find any function that calls barrier but does not call an ID function. + // hasAttr(attr::Kind::OpenCLKernel) restricts it to only kernel functions. + // FIXME: Have it accept all functions but check for a parameter that gets an + // ID from one of the four ID functions. + Finder->addMatcher( + // Find function declarations... + functionDecl( + allOf( + // That are OpenCL kernels... + hasAttr(attr::Kind::OpenCLKernel), + // And call a barrier function (either 1.x or 2.x version)... + forEachDescendant(callExpr(callee(functionDecl(hasAnyName( + "barrier", "work_group_barrier")))) + .bind("barrier")), + // But do not call an ID function. + unless(hasDescendant(callExpr(callee(functionDecl( + hasAnyName("get_global_id", "get_local_id", "get_group_id", + "get_local_linear_id")))))))) + .bind("function"), + this); +} + +void SingleWorkItemBarrierCheck::check(const MatchFinder::MatchResult &Result) { + const auto *MatchedDecl = Result.Nodes.getNodeAs("function"); + const auto *MatchedBarrier = Result.Nodes.getNodeAs("barrier"); + if (AOCVersion < 1701) { + // get_group_id and get_local_linear_id were added at/after v17.01 + diag(MatchedDecl->getLocation(), + "kernel function %0 does not call 'get_global_id' or 'get_local_id' " + "and will be treated as a single work-item") + << MatchedDecl; + diag(MatchedBarrier->getBeginLoc(), + "barrier call is in a single work-item and may error out", + DiagnosticIDs::Note); + } else { + // If reqd_work_group_size is anything other than (1,1,1), it will be + // interpreted as an NDRange in AOC version >= 17.1. + bool IsNDRange = false; + if (MatchedDecl->hasAttr()) { + const auto *Attribute = MatchedDecl->getAttr(); + if (Attribute->getXDim() > 1 || Attribute->getYDim() > 1 || + Attribute->getZDim() > 1) + IsNDRange = true; + } + if (IsNDRange) // No warning if kernel is treated as an NDRange. + return; + diag(MatchedDecl->getLocation(), + "kernel function %0 does not call an ID function and may be a viable " + "single work-item, but will be forced to execute as an NDRange") + << MatchedDecl; + diag(MatchedBarrier->getBeginLoc(), + "barrier call will force NDRange execution; if single work-item " + "semantics are desired a mem_fence may be more efficient", + DiagnosticIDs::Note); + } +} + +void SingleWorkItemBarrierCheck::storeOptions( + ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "AOCVersion", AOCVersion); +} + +} // namespace altera +} // namespace tidy +} // namespace clang Index: clang-tools-extra/docs/ReleaseNotes.rst =================================================================== --- clang-tools-extra/docs/ReleaseNotes.rst +++ clang-tools-extra/docs/ReleaseNotes.rst @@ -96,6 +96,12 @@ Finds kernel files and include directives whose filename is `kernel.cl`, `Verilog.cl`, or `VHDL.cl`. +- New :doc:`altera-single-work-item-barrier + ` check. + + Finds OpenCL kernel functions that call a barrier function but do not call + an ID function. + - New :doc:`altera-struct-pack-align ` check. Index: clang-tools-extra/docs/clang-tidy/checks/altera-single-work-item-barrier.rst =================================================================== --- /dev/null +++ clang-tools-extra/docs/clang-tidy/checks/altera-single-work-item-barrier.rst @@ -0,0 +1,58 @@ +.. title:: clang-tidy - altera-single-work-item-barrier + +altera-single-work-item-barrier +=============================== + +Finds OpenCL kernel functions that call a barrier function but do not call +an ID function (``get_local_id``, ``get_local_id``, ``get_group_id``, or +``get_local_linear_id``). + +These kernels may be viable single work-item kernels, but will be forced to +execute as NDRange kernels if using a newer version of the Altera Offline +Compiler (>= v17.01). + +If using an older version of the Altera Offline Compiler, these kernel +functions will be treated as single work-item kernels, which could be +inefficient or lead to errors if NDRange semantics were intended. + +Based on the `Altera SDK for OpenCL: Best Practices Guide +`_. + +Examples: + +.. code-block:: c++ + + // error: function calls barrier but does not call an ID function. + void __kernel barrier_no_id(__global int * foo, int size) { + for (int i = 0; i < 100; i++) { + foo[i] += 5; + } + barrier(CLK_GLOBAL_MEM_FENCE); + } + + // ok: function calls barrier and an ID function. + void __kernel barrier_with_id(__global int * foo, int size) { + for (int i = 0; i < 100; i++) { + int tid = get_global_id(0); + foo[tid] += 5; + } + barrier(CLK_GLOBAL_MEM_FENCE); + } + + // ok with AOC Version 17.01: the reqd_work_group_size turns this into + // an NDRange. + __attribute__((reqd_work_group_size(2,2,2))) + void __kernel barrier_with_id(__global int * foo, int size) { + for (int i = 0; i < 100; i++) { + foo[tid] += 5; + } + barrier(CLK_GLOBAL_MEM_FENCE); + } + +Options +------- + +.. option:: AOCVersion + + Defines the version of the Altera Offline Compiler. Defaults to ``1600`` + (corresponding to version 16.00). Index: clang-tools-extra/docs/clang-tidy/checks/list.rst =================================================================== --- clang-tools-extra/docs/clang-tidy/checks/list.rst +++ clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -31,6 +31,7 @@ `abseil-time-subtraction `_, "Yes" `abseil-upgrade-duration-conversions `_, "Yes" `altera-kernel-name-restriction `_, + `altera-single-work-item-barrier `_, "Yes" `altera-struct-pack-align `_, `android-cloexec-accept `_, "Yes" `android-cloexec-accept4 `_, Index: clang-tools-extra/test/clang-tidy/checkers/altera-single-work-item-barrier.cpp =================================================================== --- /dev/null +++ clang-tools-extra/test/clang-tidy/checkers/altera-single-work-item-barrier.cpp @@ -0,0 +1,300 @@ +// RUN: %check_clang_tidy -check-suffix=OLDCLOLDAOC %s altera-single-work-item-barrier %t -- -header-filter=.* "--" -cl-std=CL1.2 -c --include opencl-c.h -DOLDCLOLDAOC +// RUN: %check_clang_tidy -check-suffix=NEWCLOLDAOC %s altera-single-work-item-barrier %t -- -header-filter=.* "--" -cl-std=CL2.0 -c --include opencl-c.h -DNEWCLOLDAOC +// RUN: %check_clang_tidy -check-suffix=OLDCLNEWAOC %s altera-single-work-item-barrier %t -- -config='{CheckOptions: [{key: altera-single-work-item-barrier.AOCVersion, value: 1701}]}' -header-filter=.* "--" -cl-std=CL1.2 -c --include opencl-c.h -DOLDCLNEWAOC +// RUN: %check_clang_tidy -check-suffix=NEWCLNEWAOC %s altera-single-work-item-barrier %t -- -config='{CheckOptions: [{key: altera-single-work-item-barrier.AOCVersion, value: 1701}]}' -header-filter=.* "--" -cl-std=CL2.0 -c --include opencl-c.h -DNEWCLNEWAOC + +#ifdef OLDCLOLDAOC // OpenCL 1.2 Altera Offline Compiler < 17.1 +void __kernel error_barrier_no_id(__global int * foo, int size) { + // CHECK-MESSAGES-OLDCLOLDAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id' does not call 'get_global_id' or 'get_local_id' and will be treated as a single work-item [altera-single-work-item-barrier] + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE); + // CHECK-MESSAGES-OLDCLOLDAOC: :[[@LINE-1]]:3: note: barrier call is in a single work-item and may error out + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void __kernel success_barrier_global_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void __kernel success_barrier_local_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void __kernel success_barrier_both_ids(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} + +void success_nokernel_barrier_no_id(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE); + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void success_nokernel_barrier_global_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void success_nokernel_barrier_local_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void success_nokernel_barrier_both_ids(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} +#endif + +#ifdef NEWCLOLDAOC // OpenCL 2.0 Altera Offline Compiler < 17.1 +void __kernel error_barrier_no_id(__global int * foo, int size) { + // CHECK-MESSAGES-NEWCLOLDAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id' does not call 'get_global_id' or 'get_local_id' and will be treated as a single work-item [altera-single-work-item-barrier] + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + // CHECK-MESSAGES-NEWCLOLDAOC: :[[@LINE-1]]:3: note: barrier call is in a single work-item and may error out + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void __kernel success_barrier_global_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void __kernel success_barrier_local_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void __kernel success_barrier_both_ids(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} + +void success_nokernel_barrier_no_id(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void success_nokernel_barrier_global_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void success_nokernel_barrier_local_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void success_nokernel_barrier_both_ids(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} +#endif + +#ifdef OLDCLNEWAOC // OpenCL 1.2 Altera Offline Compiler >= 17.1 +void __kernel error_barrier_no_id(__global int * foo, int size) { + // CHECK-MESSAGES-OLDCLNEWAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id' does not call an ID function and may be a viable single work-item, but will be forced to execute as an NDRange [altera-single-work-item-barrier] + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE); + // CHECK-MESSAGES-OLDCLNEWAOC: :[[@LINE-1]]:3: note: barrier call will force NDRange execution; if single work-item semantics are desired a mem_fence may be more efficient + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +__attribute__ ((reqd_work_group_size(1,1,1))) +void __kernel error_barrier_no_id_work_group_size(__global int * foo, int size) { + // CHECK-MESSAGES-OLDCLNEWAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id_work_group_size' does not call an ID function and may be a viable single work-item, but will be forced to execute as an NDRange [altera-single-work-item-barrier] + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE); + // CHECK-MESSAGES-OLDCLNEWAOC: :[[@LINE-1]]:3: note: barrier call will force NDRange execution; if single work-item semantics are desired a mem_fence may be more efficient + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +__attribute__ ((reqd_work_group_size(2,1,1))) +void __kernel success_barrier_no_id_work_group_size(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE); + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void __kernel success_barrier_global_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void __kernel success_barrier_local_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void __kernel success_barrier_both_ids(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} + +void success_nokernel_barrier_no_id(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE); + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void success_nokernel_barrier_global_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void success_nokernel_barrier_local_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void success_nokernel_barrier_both_ids(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} +#endif + +#ifdef NEWCLNEWAOC // OpenCL 2.0 Altera Offline Compiler >= 17.1 +void __kernel error_barrier_no_id(__global int * foo, int size) { + // CHECK-MESSAGES-NEWCLNEWAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id' does not call an ID function and may be a viable single work-item, but will be forced to execute as an NDRange [altera-single-work-item-barrier] + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + // CHECK-MESSAGES-NEWCLNEWAOC: :[[@LINE-1]]:3: note: barrier call will force NDRange execution; if single work-item semantics are desired a mem_fence may be more efficient + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +__attribute__ ((reqd_work_group_size(1,1,1))) +void __kernel error_barrier_no_id_work_group_size(__global int * foo, int size) { + // CHECK-MESSAGES-NEWCLNEWAOC: :[[@LINE-1]]:15: warning: kernel function 'error_barrier_no_id_work_group_size' does not call an ID function and may be a viable single work-item, but will be forced to execute as an NDRange [altera-single-work-item-barrier] + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + // CHECK-MESSAGES-NEWCLNEWAOC: :[[@LINE-1]]:3: note: barrier call will force NDRange execution; if single work-item semantics are desired a mem_fence may be more efficient + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +__attribute__ ((reqd_work_group_size(2,1,1))) +void __kernel success_barrier_no_id_work_group_size(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void __kernel success_barrier_global_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void __kernel success_barrier_local_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void __kernel success_barrier_both_ids(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} + +void success_nokernel_barrier_no_id(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void success_nokernel_barrier_global_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void success_nokernel_barrier_local_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void success_nokernel_barrier_both_ids(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} +#endif