Index: clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp =================================================================== --- clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp +++ clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp @@ -9,6 +9,7 @@ #include "../ClangTidy.h" #include "../ClangTidyModule.h" #include "../ClangTidyModuleRegistry.h" +#include "SingleWorkItemBarrierCheck.h" #include "StructPackAlignCheck.h" using namespace clang::ast_matchers; @@ -20,6 +21,8 @@ class AlteraModule : public ClangTidyModule { public: void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { + CheckFactories.registerCheck( + "altera-single-work-item-barrier"); CheckFactories.registerCheck( "altera-struct-pack-align"); } Index: clang-tools-extra/clang-tidy/altera/CMakeLists.txt =================================================================== --- clang-tools-extra/clang-tidy/altera/CMakeLists.txt +++ clang-tools-extra/clang-tidy/altera/CMakeLists.txt @@ -5,6 +5,7 @@ add_clang_library(clangTidyAlteraModule AlteraTidyModule.cpp + SingleWorkItemBarrierCheck.cpp StructPackAlignCheck.cpp LINK_LIBS Index: clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.h =================================================================== --- /dev/null +++ clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.h @@ -0,0 +1,40 @@ +//===--- SingleWorkItemBarrierCheck.h - clang-tidy---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace altera { + +/// Detects OpenCL kernel functions that call a barrier but do not call an +/// ID-function function. These functions will be treated as single work-item +/// kernels, which may be inefficient or cause an error. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/opencl-single-work-item-barrier.html +class SingleWorkItemBarrierCheck : public ClangTidyCheck { + const unsigned AOCVersion; + +public: + SingleWorkItemBarrierCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + AOCVersion(Options.get("AOCVersion", 1600U)) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; +}; + +} // namespace altera +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H Index: clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp @@ -0,0 +1,88 @@ +//===--- SingleWorkItemBarrierCheck.cpp - clang-tidy-----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SingleWorkItemBarrierCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace altera { + +void SingleWorkItemBarrierCheck::registerMatchers(MatchFinder *Finder) { + // Find any function that calls barrier but does not call either get_local_id + // or get_global_id, and will thus be treated as a single-work-item kernel + // hasAttr(attr::Kind::OpenCLKernel) restricts it to only kernel functions. + // FIXME: Have it accept all functions but check for a parameter that gets an + // ID from one of the two ID functions. + Finder->addMatcher( + // Find function declarations... + functionDecl( + allOf( + // That are OpenCL kernels... + hasAttr(attr::Kind::OpenCLKernel), + // And call a barrier function (either 1.x or 2.x version)... + forEachDescendant(callExpr(callee(functionDecl(anyOf( + hasName("barrier"), + hasName("work_group_barrier"))))) + .bind("barrier")), + // But do not call an ID function. + unless(hasDescendant(callExpr(callee(functionDecl(anyOf( + hasName("get_global_id"), hasName("get_local_id"))))))))) + .bind("function"), + this); +} + +void SingleWorkItemBarrierCheck::check(const MatchFinder::MatchResult &Result) { + const auto *MatchedDecl = Result.Nodes.getNodeAs("function"); + const auto *MatchedBarrier = Result.Nodes.getNodeAs("barrier"); + // If reqd_work_group_size is anything other than (1,1,1), it will be + // interpreted as an NDRange in AOC version 17.1. + bool IsNDRange = false; + for (const Attr *Attribute : MatchedDecl->getAttrs()) { + if (Attribute->getKind() == attr::Kind::ReqdWorkGroupSize) { + const auto *RWGSAttribute = + static_cast(Attribute); + if (RWGSAttribute->getXDim() > 1 || RWGSAttribute->getYDim() > 1 || + RWGSAttribute->getZDim() > 1) + IsNDRange = true; + break; + } + } + if (AOCVersion < 1701) + diag(MatchedDecl->getLocation(), + "Kernel function %0 does not call get_global_id or get_local_id and " + "will be treated as single-work-item.\nBarrier call at %1 may error " + "out") + << MatchedDecl + << MatchedBarrier->getBeginLoc().printToString( + Result.Context->getSourceManager()); + else { + if (IsNDRange) // No warning if kernel is treated as an NDRange. + return; + diag(MatchedDecl->getLocation(), + "Kernel function %0 does not call get_global_id or get_local_id may " + "be a viable single work-item kernel, but barrier call at %1 will " + "force NDRange execution. If single work-item semantics are desired a " + "mem_fence may be more efficient.") + << MatchedDecl + << MatchedBarrier->getBeginLoc().printToString( + Result.Context->getSourceManager()); + } +} + +void SingleWorkItemBarrierCheck::storeOptions( + ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "AOCVersion", AOCVersion); +} + +} // namespace altera +} // namespace tidy +} // namespace clang Index: clang-tools-extra/docs/ReleaseNotes.rst =================================================================== --- clang-tools-extra/docs/ReleaseNotes.rst +++ clang-tools-extra/docs/ReleaseNotes.rst @@ -79,6 +79,12 @@ New checks ^^^^^^^^^^ +- New :doc:`altera-single-work-item-barrier + ` check. + + Finds OpenCL kernel functions that call a barrier function but do not call + an ID function. + - New :doc:`altera-struct-pack-align ` check. Index: clang-tools-extra/docs/clang-tidy/checks/altera-single-work-item-barrier.rst =================================================================== --- /dev/null +++ clang-tools-extra/docs/clang-tidy/checks/altera-single-work-item-barrier.rst @@ -0,0 +1,52 @@ +.. title:: clang-tidy - altera-single-work-item-barrier + +altera-single-work-item-barrier +=============================== + +Finds OpenCL kernel functions that call a barrier function but do not call +an ID function. + +These kernel functions will be treated as single work-item kernels, which +could be inefficient or lead to errors. + +Based on the `Altera SDK for OpenCL: Best Practices Guide +`_. + +Examples: + +.. code-block:: c++ + + // error: function calls barrier but does not call an ID function. + void __kernel barrier_no_id(__global int * foo, int size) { + for (int i = 0; i < 100; i++) { + foo[i] += 5; + } + barrier(CLK_GLOBAL_MEM_FENCE); + } + + // ok: function calls barrier and an ID function. + void __kernel barrier_with_id(__global int * foo, int size) { + for (int i = 0; i < 100; i++) { + int tid = get_global_id(0); + foo[tid] += 5; + } + barrier(CLK_GLOBAL_MEM_FENCE); + } + + // ok with AOC Version 17.01: the reqd_work_group_size turns this into + // an NDRange. + __attribute__((reqd_work_group_size(2,2,2))) + void __kernel barrier_with_id(__global int * foo, int size) { + for (int i = 0; i < 100; i++) { + foo[tid] += 5; + } + barrier(CLK_GLOBAL_MEM_FENCE); + } + +Options +------- + +.. option:: AOCVersion + + Defines the version of the Altera Offline Compiler. Defaults to `1600` + (corresponding to version 16.00). Index: clang-tools-extra/docs/clang-tidy/checks/list.rst =================================================================== --- clang-tools-extra/docs/clang-tidy/checks/list.rst +++ clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -30,6 +30,7 @@ `abseil-time-comparison `_, "Yes" `abseil-time-subtraction `_, "Yes" `abseil-upgrade-duration-conversions `_, "Yes" + `altera-single-work-item-barrier `_, "Yes" `altera-struct-pack-align `_, `android-cloexec-accept `_, "Yes" `android-cloexec-accept4 `_, Index: clang-tools-extra/test/clang-tidy/checkers/altera-single-work-item-barrier.cpp =================================================================== --- /dev/null +++ clang-tools-extra/test/clang-tidy/checkers/altera-single-work-item-barrier.cpp @@ -0,0 +1,294 @@ +// RUN: %check_clang_tidy -check-suffix=OLD %s altera-single-work-item-barrier %t -- -header-filter=.* "--" -cl-std=CL1.2 -c --include opencl-c.h -DOLD +// RUN: %check_clang_tidy -check-suffix=NEW %s altera-single-work-item-barrier %t -- -header-filter=.* "--" -cl-std=CL2.0 -c --include opencl-c.h -DNEW +// RUN: %check_clang_tidy -check-suffix=AOCOLD %s altera-single-work-item-barrier %t -- -config='{CheckOptions: [{key: altera-single-work-item-barrier.AOCVersion, value: 1701}]}' -header-filter=.* "--" -cl-std=CL1.2 -c --include opencl-c.h -DAOCOLD +// RUN: %check_clang_tidy -check-suffix=AOCNEW %s altera-single-work-item-barrier %t -- -config='{CheckOptions: [{key: altera-single-work-item-barrier.AOCVersion, value: 1701}]}' -header-filter=.* "--" -cl-std=CL2.0 -c --include opencl-c.h -DAOCNEW + +#ifdef OLD +void __kernel error_barrier_no_id(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE); + // CHECK-MESSAGES-OLD: :[[@LINE-7]]:15: warning: Kernel function 'error_barrier_no_id' does not call get_global_id or get_local_id and will be treated as single-work-item.{{[[:space:]]}}Barrier call at {{(\/)?([^\/\0]+(\/)?)+}}:[[@LINE-1]]:3 may error out [altera-single-work-item-barrier] + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void __kernel success_barrier_global_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void __kernel success_barrier_local_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void __kernel success_barrier_both_ids(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} + +void success_nokernel_barrier_no_id(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE); + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void success_nokernel_barrier_global_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void success_nokernel_barrier_local_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void success_nokernel_barrier_both_ids(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} +#endif + +#ifdef NEW +void __kernel error_barrier_no_id(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + // CHECK-MESSAGES-NEW: :[[@LINE-7]]:15: warning: Kernel function 'error_barrier_no_id' does not call get_global_id or get_local_id and will be treated as single-work-item.{{[[:space:]]}}Barrier call at {{(\/)?([^\/\0]+(\/)?)+}}:[[@LINE-1]]:3 may error out [altera-single-work-item-barrier] + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void __kernel success_barrier_global_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void __kernel success_barrier_local_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void __kernel success_barrier_both_ids(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} + +void success_nokernel_barrier_no_id(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void success_nokernel_barrier_global_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void success_nokernel_barrier_local_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void success_nokernel_barrier_both_ids(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} +#endif + +#ifdef AOCOLD +void __kernel error_barrier_no_id(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE); + // CHECK-MESSAGES-AOCOLD: :[[@LINE-7]]:15: warning: Kernel function 'error_barrier_no_id' does not call get_global_id or get_local_id may be a viable single work-item kernel, but barrier call at {{(\/)?([^\/\0]+(\/)?)+}}:[[@LINE-1]]:3 will force NDRange execution. If single work-item semantics are desired a mem_fence may be more efficient. [altera-single-work-item-barrier] + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +__attribute__ ((reqd_work_group_size(1,1,1))) +void __kernel error_barrier_no_id_work_group_size(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE); + // CHECK-MESSAGES-AOCOLD: :[[@LINE-7]]:15: warning: Kernel function 'error_barrier_no_id_work_group_size' does not call get_global_id or get_local_id may be a viable single work-item kernel, but barrier call at {{(\/)?([^\/\0]+(\/)?)+}}:[[@LINE-1]]:3 will force NDRange execution. If single work-item semantics are desired a mem_fence may be more efficient. [altera-single-work-item-barrier] + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +__attribute__ ((reqd_work_group_size(2,1,1))) +void __kernel success_barrier_no_id_work_group_size(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE); + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void __kernel success_barrier_global_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void __kernel success_barrier_local_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void __kernel success_barrier_both_ids(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} + +void success_nokernel_barrier_no_id(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE); + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void success_nokernel_barrier_global_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void success_nokernel_barrier_local_id(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void success_nokernel_barrier_both_ids(__global int * foo, int size) { + barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} +#endif + +#ifdef AOCNEW +void __kernel error_barrier_no_id(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + // CHECK-MESSAGES-AOCNEW: :[[@LINE-7]]:15: warning: Kernel function 'error_barrier_no_id' does not call get_global_id or get_local_id may be a viable single work-item kernel, but barrier call at {{(\/)?([^\/\0]+(\/)?)+}}:[[@LINE-1]]:3 will force NDRange execution. If single work-item semantics are desired a mem_fence may be more efficient. [altera-single-work-item-barrier] + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +__attribute__ ((reqd_work_group_size(1,1,1))) +void __kernel error_barrier_no_id_work_group_size(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + // CHECK-MESSAGES-AOCNEW: :[[@LINE-7]]:15: warning: Kernel function 'error_barrier_no_id_work_group_size' does not call get_global_id or get_local_id may be a viable single work-item kernel, but barrier call at {{(\/)?([^\/\0]+(\/)?)+}}:[[@LINE-1]]:3 will force NDRange execution. If single work-item semantics are desired a mem_fence may be more efficient. [altera-single-work-item-barrier] + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +__attribute__ ((reqd_work_group_size(2,1,1))) +void __kernel success_barrier_no_id_work_group_size(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void __kernel success_barrier_global_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void __kernel success_barrier_local_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void __kernel success_barrier_both_ids(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} + +void success_nokernel_barrier_no_id(__global int * foo, int size) { + for (int j = 0; j < 256; j++) { + for (int i = 256; i < size; i+= 256) { + foo[j] += foo[j+i]; + } + } + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + for (int i = 1; i < 256; i++) { + foo[0] += foo[i]; + } +} + +void success_nokernel_barrier_global_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_global_id(0); +} + +void success_nokernel_barrier_local_id(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int tid = get_local_id(0); +} + +void success_nokernel_barrier_both_ids(__global int * foo, int size) { + work_group_barrier(CLK_GLOBAL_MEM_FENCE); + int gid = get_global_id(0); + int lid = get_local_id(0); +} +#endif Index: llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/altera/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/altera/BUILD.gn +++ llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/altera/BUILD.gn @@ -13,6 +13,7 @@ ] sources = [ "AlteraTidyModule.cpp", + "SingleWorkItemBarrierCheck.cpp", "StructPackAlignCheck.cpp", ] }