diff --git a/llvm/include/llvm/Analysis/EmitCModelRegistry.h b/llvm/include/llvm/Analysis/EmitCModelRegistry.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/EmitCModelRegistry.h @@ -0,0 +1,121 @@ +//===- EmitCModelRegistry.h ---- Registry for EmitC models -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a registry for EmitC generated models. The idea is that +// generated models register themselves here, and then optimization passes can +// look up each model by the generated string. This separates concerns between +// people who want to integrate new models for existing ML optimization passes +// (ml inline -Oz, for example) and people who want to expose new passes to ML. +// +// The normal case should be that EmitC models should be selected via a command +// line flag, whose string value is passed to the registry as a lookup. +// +// Registration should be performed by invoking the REGISTER_EMITC_MODEL macro +// in a .cpp file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_EMITCMODELREGISTRY_H +#define LLVM_ANALYSIS_EMITCMODELREGISTRY_H + +#include +#include + +#include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/MLModelRunner.h" + +namespace llvm { + +/// Meyer singleton representing the registry. There will be one instance of the +/// registry for each ModelT type, which represents the interface for a +/// particular model (inlining, regalloc, etc). +class EmitCModelRegistry { +public: + /// Function type which takes a LLVMContext, Input spec, and Output spec and + /// returns a MLRunner + using RunnerFactoryType = std::function( + LLVMContext &, const std::vector &, const TensorSpec &)>; + + /// Get a reference to the singleton registry + static EmitCModelRegistry &get() { + static EmitCModelRegistry Registry; + return Registry; + } + + /// Register the given Factory under Name + void registerModelFactory(std::string Name, RunnerFactoryType Factory) { + auto itr = ModelFactories.find(Name); + if (itr != std::end(ModelFactories)) { + llvm::errs() << "Conflicting model factory registrations in " + "EmitCModelFactory; conflicting at name [" + << Name << "]\n"; + } + ModelFactories[Name] = std::move(Factory); + } + + /// create a MLModelRunner from the factory registered at Name. + std::unique_ptr + createModelRunner(const std::string &Name, LLVMContext &Ctx, + const std::vector &Inputs, + const TensorSpec &Advice) { + auto itr = ModelFactories.find(Name); + if (itr == std::end(ModelFactories)) { + Ctx.emitError("[EmitCModelRegistry] Could not find model: " + Name + + ". The following models have been registered:\n" + + getAllModelsStr()); + return nullptr; + } + return itr->second(Ctx, Inputs, Advice); + } + +private: + EmitCModelRegistry() {} + + /// Returns a string representing all registered model factories + std::string getAllModelsStr() { + std::string Res; + for (const auto &[K, V] : ModelFactories) { + Res += K; + Res += "\n"; + } + return Res; + } + + llvm::StringMap ModelFactories; +}; + +/// Helper class whose constructor performs a model registration. Constructing +/// an object of this type is all you need to do to register the model runner. +template class EmitCModelRegistrationHandle { +public: + /// Create a RegistrationHandle which automatically registers a factory method + /// for the MLModelRunner of type RunnerT + EmitCModelRegistrationHandle(std::string Name) { + EmitCModelRegistry::get().registerModelFactory( + Name, [Name](LLVMContext &Ctx, const std::vector &Inputs, + const TensorSpec &Advice) { + return std::make_unique(Ctx, Inputs, Advice); + }); + } +}; +} // namespace llvm + +// Macro which simplifies registering models with the registry. +#define REGISTER_EMITC_MODEL(ModelRunnerType, EmitCModelType) \ + namespace { \ + llvm::EmitCModelRegistrationHandle< \ + ModelRunnerType> \ + _handle_##EmitCModelType(emitc_generated::EmitCModelType::name()); \ + } \ + static_assert(true, "") +// The above (trivial) static assert generates no code but forces invocations of +// this macro to end with a semicolon. This is mostly aesthetic, but it also +// silences a compiler warning about ending the macro with a semicolon, which +// many programmers would naturally do anyways. + +#endif diff --git a/llvm/include/llvm/Analysis/EmitCTensor.h b/llvm/include/llvm/Analysis/EmitCTensor.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/EmitCTensor.h @@ -0,0 +1,217 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +// This code was originally sourced from github.com/iml130/mlir-emitc and has +// been modified to fit the needs of generated C++ models in LLVM. + +#ifndef LLVM_ANALYSIS_EMITCTENSOR_H +#define LLVM_ANALYSIS_EMITCTENSOR_H + +#include +#include +#include +#include +#include +#include +#include + +// Note: it is critical to keep the llvm prefix of this namespace as the +// autogenerated emitc files include an embedded runtime which duplicates +// _some_ of this code, and this namespace prefix prevents any ODR violations +namespace llvm::emitc { + +namespace utility { +template static constexpr size_t size() { + constexpr std::array s = {Shape...}; + + size_t result = 1; + for (size_t i = 0; i < sizeof...(Shape); ++i) { + result *= s[i]; + } + return result; +} + +template +static constexpr std::array strides() { + std::array result = {}; + constexpr std::array s = {Shape...}; + + if (sizeof...(Shape) == 0) { + return result; + } + + result[sizeof...(Shape) - 1] = 1; + + for (size_t i = sizeof...(Shape) - 1; i > 0; i--) { + result[i - 1] = result[i] * s[i]; + } + + return result; +} + +template +constexpr size_t ravel_index(std::array indices) { + std::array shape = {Shape...}; + + for (size_t i = 0; i < sizeof...(Shape); ++i) { + assert(indices[i] < shape[i]); + } + + std::array s = strides(); + + size_t result = 0; + for (size_t i = 0; i < indices.size(); ++i) { + result += indices[i] * s[i]; + } + + return result; +} + +template +constexpr size_t ravel_index(Indices... indices) { + static_assert(sizeof...(Indices) == sizeof...(Shape), + "Incorrect number of arguments"); + return ravel_index({static_cast(indices)...}); +} + +template +constexpr std::array unravel_index(size_t index) { + assert(index < size()); + + std::array s = strides(); + + std::array result = {}; + for (size_t i = 0; i < sizeof...(Shape); ++i) { + result[i] = index / s[i]; + index = index % s[i]; + } + + return result; +} + +template struct conjunction : std::true_type {}; +template struct conjunction : B1 {}; +template +struct conjunction + : std::conditional_t, B1> {}; + +template constexpr bool conjunction_v = conjunction::value; +} // namespace utility + +template class Tensor { +public: + using value_type = T; + using reference = typename std::vector::reference; + using iterator = typename std::vector::iterator; + using const_iterator = typename std::vector::const_iterator; + + Tensor() : data(size()) {} + + Tensor(std::initializer_list data) : data(data) { + assert(data.size() == size()); + } + + Tensor(std::vector data) : data(std::move(data)) { + assert(data.size() == size()); + } + + T *get() { return data.data(); } + + static constexpr size_t dim(size_t index) { + assert(0 <= index && index < rank()); + constexpr std::array s = {Shape...}; + return s[index]; + } + + static constexpr size_t rank() { return sizeof...(Shape); } + + static constexpr std::array shape() { return {Shape...}; } + + static constexpr size_t size() { return utility::size(); } + + static constexpr std::array strides() { + return utility::strides(); + } + + std::vector> + window(std::array index, std::array sizes) { + std::vector> iotas; + for (auto &size : sizes) { + std::vector range(size); + std::iota(range.begin(), range.end(), 0); + iotas.push_back(range); + } + + std::vector> result; + + int resultSize = + std::accumulate(sizes.begin(), sizes.end(), 1, std::multiplies{}); + for (int n = 0; n < resultSize; ++n) { + std::array u = {}; + div_t q{n, 0}; + for (int i = iotas.size() - 1; 0 <= i; --i) { + q = div(q.quot, iotas[i].size()); + u[i] = iotas[i][q.rem]; + } + + for (size_t i = 0; i < index.size(); ++i) { + u[i] += index[i]; + } + result.push_back(u); + } + + return result; + } + + iterator begin() { return data.begin(); } + + const_iterator begin() const { return data.begin(); } + + iterator end() { return data.end(); } + + const_iterator end() const { return data.end(); } + + // Index into the flat data buffer. + reference operator[](size_t index) { + assert(0 <= index && index < size()); + return data[index]; + } + + template ...>>> + reference operator()(Indices... indices) { + static_assert(sizeof...(Indices) == rank(), + "Incorrect number of arguments"); + size_t index = ravel_index({static_cast(indices)...}); + + assert(index < size()); + return data[index]; + } + + constexpr size_t ravel_index(std::array indices) { + return utility::ravel_index(indices); + } + + constexpr std::array unravel_index(size_t index) { + return utility::unravel_index(index); + } + +private: + std::vector data; +}; + +} // namespace llvm::emitc + +#endif diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -41,7 +41,7 @@ /// training. /// /// - Dynamically load an advisor via a plugin (PluginInlineAdvisorAnalysis) -enum class InliningAdvisorMode : int { Default, Release, Development }; +enum class InliningAdvisorMode : int { Default, Release, Development, EmitC }; // Each entry represents an inline driver. enum class InlinePass : int { @@ -356,6 +356,9 @@ LazyCallGraph &CG, CGSCCUpdateResult &UR); }; +std::unique_ptr +getEmitCModeAdvisor(Module &M, ModuleAnalysisManager &MAM); + std::unique_ptr getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM); diff --git a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h --- a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h +++ b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h @@ -22,34 +22,34 @@ // inline cost, and we define them separately to preserve the original heuristic // behavior. #define INLINE_COST_FEATURE_ITERATOR(M) \ - M(SROASavings, "sroa_savings") \ - M(SROALosses, "sroa_losses") \ - M(LoadElimination, "load_elimination") \ - M(CallPenalty, "call_penalty") \ - M(CallArgumentSetup, "call_argument_setup") \ - M(LoadRelativeIntrinsic, "load_relative_intrinsic") \ - M(LoweredCallArgSetup, "lowered_call_arg_setup") \ - M(IndirectCallPenalty, "indirect_call_penalty") \ - M(JumpTablePenalty, "jump_table_penalty") \ - M(CaseClusterPenalty, "case_cluster_penalty") \ - M(SwitchPenalty, "switch_penalty") \ - M(UnsimplifiedCommonInstructions, "unsimplified_common_instructions") \ - M(NumLoops, "num_loops") \ - M(DeadBlocks, "dead_blocks") \ - M(SimplifiedInstructions, "simplified_instructions") \ - M(ConstantArgs, "constant_args") \ - M(ConstantOffsetPtrArgs, "constant_offset_ptr_args") \ - M(CallSiteCost, "callsite_cost") \ - M(ColdCcPenalty, "cold_cc_penalty") \ - M(LastCallToStaticBonus, "last_call_to_static_bonus") \ - M(IsMultipleBlocks, "is_multiple_blocks") \ - M(NestedInlines, "nested_inlines") \ - M(NestedInlineCostEstimate, "nested_inline_cost_estimate") \ - M(Threshold, "threshold") + M(SROASavings, sroa_savings, "") \ + M(SROALosses, sroa_losses, "") \ + M(LoadElimination, load_elimination, "") \ + M(CallPenalty, call_penalty, "") \ + M(CallArgumentSetup, call_argument_setup, "") \ + M(LoadRelativeIntrinsic, load_relative_intrinsic, "") \ + M(LoweredCallArgSetup, lowered_call_arg_setup, "") \ + M(IndirectCallPenalty, indirect_call_penalty, "") \ + M(JumpTablePenalty, jump_table_penalty, "") \ + M(CaseClusterPenalty, case_cluster_penalty, "") \ + M(SwitchPenalty, switch_penalty, "") \ + M(UnsimplifiedCommonInstructions, unsimplified_common_instructions, "") \ + M(NumLoops, num_loops, "") \ + M(DeadBlocks, dead_blocks, "") \ + M(SimplifiedInstructions, simplified_instructions, "") \ + M(ConstantArgs, constant_args, "") \ + M(ConstantOffsetPtrArgs, constant_offset_ptr_args, "") \ + M(CallSiteCost, callsite_cost, "") \ + M(ColdCcPenalty, cold_cc_penalty, "") \ + M(LastCallToStaticBonus, last_call_to_static_bonus, "") \ + M(IsMultipleBlocks, is_multiple_blocks, "") \ + M(NestedInlines, nested_inlines, "") \ + M(NestedInlineCostEstimate, nested_inline_cost_estimate, "") \ + M(Threshold, threshold, "") // clang-format off enum class InlineCostFeatureIndex : size_t { -#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME, +#define POPULATE_INDICES(INDEX_NAME, NAME, DOC) INDEX_NAME, INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES) #undef POPULATE_INDICES @@ -81,39 +81,37 @@ // programmatically, and serves as workaround to inability of inserting comments // in macros. #define INLINE_FEATURE_ITERATOR(M) \ - M(CalleeBasicBlockCount, "callee_basic_block_count", \ + M(CalleeBasicBlockCount, callee_basic_block_count, \ "number of basic blocks of the callee") \ - M(CallSiteHeight, "callsite_height", \ + M(CallSiteHeight, callsite_height, \ "position of the call site in the original call graph - measured from " \ "the farthest SCC") \ - M(NodeCount, "node_count", \ + M(NodeCount, node_count, \ "total current number of defined functions in the module") \ - M(NrCtantParams, "nr_ctant_params", \ + M(NrCtantParams, nr_ctant_params, \ "number of parameters in the call site that are constants") \ - M(CostEstimate, "cost_estimate", "total cost estimate (threshold - free)") \ - M(EdgeCount, "edge_count", "total number of calls in the module") \ - M(CallerUsers, "caller_users", \ + M(CostEstimate, cost_estimate, "total cost estimate (threshold - free)") \ + M(EdgeCount, edge_count, "total number of calls in the module") \ + M(CallerUsers, caller_users, \ "number of module-internal users of the caller, +1 if the caller is " \ "exposed externally") \ - M(CallerConditionallyExecutedBlocks, "caller_conditionally_executed_blocks", \ + M(CallerConditionallyExecutedBlocks, caller_conditionally_executed_blocks, \ "number of blocks reached from a conditional instruction, in the caller") \ - M(CallerBasicBlockCount, "caller_basic_block_count", \ + M(CallerBasicBlockCount, caller_basic_block_count, \ "number of basic blocks in the caller") \ - M(CalleeConditionallyExecutedBlocks, "callee_conditionally_executed_blocks", \ + M(CalleeConditionallyExecutedBlocks, callee_conditionally_executed_blocks, \ "number of blocks reached from a conditional instruction, in the callee") \ - M(CalleeUsers, "callee_users", \ + M(CalleeUsers, callee_users, \ "number of module-internal users of the callee, +1 if the callee is " \ "exposed externally") // clang-format off enum class FeatureIndex : size_t { +#define POPULATE_INDICES(INDEX_NAME, NAME, COMMENT) INDEX_NAME, // InlineCost features - these must come first -#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME, INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES) -#undef POPULATE_INDICES // Non-cost features -#define POPULATE_INDICES(INDEX_NAME, NAME, COMMENT) INDEX_NAME, INLINE_FEATURE_ITERATOR(POPULATE_INDICES) #undef POPULATE_INDICES diff --git a/llvm/include/llvm/Analysis/MLInlineEmitCModel.h b/llvm/include/llvm/Analysis/MLInlineEmitCModel.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/MLInlineEmitCModel.h @@ -0,0 +1,49 @@ +//===- MLInlineEmitCModel.h -- Model for inlining EmitC Models --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// + +#ifndef LLVM_ANALYSIS_MLINLINEEMITCMODEL_H +#define LLVM_ANALYSIS_MLINLINEEMITCMODEL_H + +#include "llvm/Analysis/EmitCTensor.h" +#include "llvm/Analysis/InlineModelFeatureMaps.h" + +#include + +namespace llvm { + +// This is the base class for all EmitC-generated models for the inlining -Oz +// problem. +class MLInlineOzEmitCModel { +public: + // Define a setter method for each input field +#define DEFINE_SETTER(cpp_name, py_name, _) \ + virtual void set_##py_name(emitc::Tensor) = 0; + + INLINE_FEATURE_ITERATOR(DEFINE_SETTER); + INLINE_COST_FEATURE_ITERATOR(DEFINE_SETTER); +#undef FEATURE_SETTER + + // These setters represent fields in every EmitC-generated model. We include + // them here for completeness, but they are not pure-virtual because they are + // not strictly necessary. + virtual void set_inlining_default(emitc::Tensor x) {} + virtual void set_step_type(emitc::Tensor x) {} + virtual void set_discount(emitc::Tensor x) {} + virtual void set_reward(emitc::Tensor x) {} + + // Name of the model: this is used when inserting models into the registry + virtual std::string name() = 0; + + // Run the model + virtual emitc::Tensor run() = 0; +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Analysis/MLModelRunner.h b/llvm/include/llvm/Analysis/MLModelRunner.h --- a/llvm/include/llvm/Analysis/MLModelRunner.h +++ b/llvm/include/llvm/Analysis/MLModelRunner.h @@ -47,7 +47,14 @@ return (const_cast(this))->getTensorUntyped(Index); } - enum class Kind : int { Unknown, Release, Development, NoOp, Interactive }; + enum class Kind : int { + Unknown, + Release, + Development, + NoOp, + Interactive, + EmitC + }; Kind getKind() const { return Type; } virtual void switchContext(StringRef Name) {} diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -23,6 +23,8 @@ endif() endif() +add_subdirectory(models) + add_llvm_component_library(LLVMAnalysis AliasAnalysis.cpp AliasAnalysisEvaluator.cpp @@ -164,4 +166,5 @@ ProfileData Support TargetParser + EmitCModels ) diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp --- a/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -234,6 +234,9 @@ LLVM_DEBUG(dbgs() << "Using release-mode inliner policy.\n"); Advisor = llvm::getReleaseModeAdvisor(M, MAM); break; + case InliningAdvisorMode::EmitC: + LLVM_DEBUG(dbgs() << "Using EmitC-compiled policy.\n"); + Advisor = llvm::getEmitCModeAdvisor(M, MAM); } return !!Advisor; diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -12,9 +12,11 @@ // //===----------------------------------------------------------------------===// #include "llvm/Analysis/MLInlineAdvisor.h" +#include "MLInlinerEmitCRunner.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/EmitCModelRegistry.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InlineModelFeatureMaps.h" @@ -30,8 +32,17 @@ #include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" +// Start EmitC model registration +#include "models/emitc/InlineOzTestModel.emitc.h" +REGISTER_EMITC_MODEL(::llvm::MLInlinerEmitCRunner, InlineOzTestModel); +// End EmitC model registration + using namespace llvm; +static cl::opt MLInlineEmitCModelName( + "inliner-emitc-model-name", cl::Hidden, + cl::desc("Name of the model to use for the ml inlining advisor.")); + static cl::opt InteractiveChannelBaseName( "inliner-interactive-channel-base", cl::Hidden, cl::desc( @@ -47,6 +58,13 @@ using CompiledModelType = NoopSavedModelImpl; #endif +std::unique_ptr +llvm::getEmitCModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { + auto Runner = EmitCModelRegistry::get().createModelRunner( + MLInlineEmitCModelName, M.getContext(), FeatureMap, InlineDecisionSpec); + return std::make_unique(M, MAM, std::move(Runner)); +} + std::unique_ptr llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { if (!llvm::isEmbeddedModelEvaluatorValid() && @@ -80,13 +98,11 @@ // clang-format off const std::vector llvm::FeatureMap{ -#define POPULATE_NAMES(_, NAME) TensorSpec::createSpec(NAME, {1} ), +#define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec(#NAME, {1} ), // InlineCost features - these must come first INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES) -#undef POPULATE_NAMES // Non-cost features -#define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec(NAME, {1} ), INLINE_FEATURE_ITERATOR(POPULATE_NAMES) #undef POPULATE_NAMES }; diff --git a/llvm/lib/Analysis/MLInlinerEmitCRunner.h b/llvm/lib/Analysis/MLInlinerEmitCRunner.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/MLInlinerEmitCRunner.h @@ -0,0 +1,63 @@ +//===- MLInlinerEmitCRunner.h ---- EmitC ML model runner -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// + +#ifndef LLVM_ANALYSIS_EMITCMODELRUNNER_H +#define LLVM_ANALYSIS_EMITCMODELRUNNER_H + +#include "llvm/Analysis/EmitCModelRegistry.h" +#include "llvm/Analysis/EmitCTensor.h" +#include "llvm/Analysis/InlineModelFeatureMaps.h" +#include "llvm/Analysis/MLInlineEmitCModel.h" +#include "llvm/Analysis/MLModelRunner.h" +#include "llvm/Analysis/TensorSpec.h" + +namespace llvm { + +template class MLInlinerEmitCRunner : public MLModelRunner { +public: + MLInlinerEmitCRunner(LLVMContext &Ctx, const std::vector &Inputs, + const TensorSpec &Advice) + : MLModelRunner(Ctx, MLModelRunner::Kind::EmitC, Inputs.size()), + InputSpecs(Inputs), OutputSpec(Advice) { + int64_t BufferIdx = 0; +#define REGISTER_BUFFER(cpp_name, py_name, _) \ + do { \ + setUpBufferForTensor(BufferIdx, Inputs[BufferIdx], \ + static_cast(Model.get_buffer_##py_name())); \ + ++BufferIdx; \ + } while (false); + + INLINE_FEATURE_ITERATOR(REGISTER_BUFFER); + INLINE_COST_FEATURE_ITERATOR(REGISTER_BUFFER); +#undef REGISTER_BUFFER + assert(BufferIdx == FeatureIndex::NumberOfFeatures && + "Did not register all inlining feature buffers!"); + } + + static bool classof(const MLModelRunner *R) { + return R->getKind() == MLModelRunner::Kind::EmitC; + } + + virtual ~MLInlinerEmitCRunner() {} + + void *evaluateUntyped() override { + auto *Result = Model.run(); + return static_cast(Result); + } + +private: + const std::vector InputSpecs; + const TensorSpec OutputSpec; + + ModelT Model; +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Analysis/models/CMakeLists.txt b/llvm/lib/Analysis/models/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/models/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(emitc) diff --git a/llvm/lib/Analysis/models/emitc/CMakeLists.txt b/llvm/lib/Analysis/models/emitc/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/models/emitc/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_component_library(LLVMEmitCModels + InlineOzTestModel.emitc.cpp +) diff --git a/llvm/lib/Analysis/models/emitc/InlineOzTestModel.emitc.h b/llvm/lib/Analysis/models/emitc/InlineOzTestModel.emitc.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/models/emitc/InlineOzTestModel.emitc.h @@ -0,0 +1,71 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +// This code was originally sourced from github.com/iml130/mlir-emitc and has +// been modified to fit the needs of generated C++ models in LLVM. +#include +#include +namespace emitc_generated { +class _InlineOzTestModelImpl; +class InlineOzTestModel { +private: + std::unique_ptr<_InlineOzTestModelImpl> impl; + +public: + InlineOzTestModel(); + ~InlineOzTestModel(); + int64_t *get_buffer_callsite_cost(); + int64_t *get_buffer_is_multiple_blocks(); + int64_t *get_buffer_caller_conditionally_executed_blocks(); + int64_t *get_buffer_inlining_default(); + int64_t *get_buffer_cold_cc_penalty(); + int64_t *get_buffer_callee_conditionally_executed_blocks(); + int64_t *get_buffer_callee_users(); + int64_t *get_buffer_callee_basic_block_count(); + int64_t *get_buffer_nr_ctant_params(); + int64_t *get_buffer_load_relative_intrinsic(); + int64_t *get_buffer_jump_table_penalty(); + int64_t *get_buffer_unsimplified_common_instructions(); + int64_t *get_buffer_indirect_call_penalty(); + int64_t *get_buffer_load_elimination(); + int64_t *get_buffer_call_penalty(); + int64_t *get_buffer_cost_estimate(); + int64_t *get_buffer_case_cluster_penalty(); + int64_t *get_buffer_node_count(); + int64_t *get_buffer_call_argument_setup(); + int64_t *get_buffer_sroa_savings(); + int64_t *get_buffer_lowered_call_arg_setup(); + int64_t *get_buffer_threshold(); + int64_t *get_buffer_dead_blocks(); + int64_t *get_buffer_constant_args(); + int64_t *get_buffer_sroa_losses(); + int64_t *get_buffer_simplified_instructions(); + int64_t *get_buffer_num_loops(); + int32_t *get_buffer_step_type(); + int64_t *get_buffer_edge_count(); + int64_t *get_buffer_nested_inlines(); + int64_t *get_buffer_caller_basic_block_count(); + int64_t *get_buffer_last_call_to_static_bonus(); + int64_t *get_buffer_nested_inline_cost_estimate(); + int64_t *get_buffer_callsite_height(); + int64_t *get_buffer_constant_offset_ptr_args(); + int64_t *get_buffer_switch_penalty(); + float *get_buffer_discount(); + int64_t *get_buffer_caller_users(); + float *get_buffer_reward(); + static std::string name() { return "InlineOzTestModel"; } + int64_t *run(); +}; + +} // namespace emitc_generated diff --git a/llvm/lib/Analysis/models/emitc/InlineOzTestModel.emitc.cpp b/llvm/lib/Analysis/models/emitc/InlineOzTestModel.emitc.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/models/emitc/InlineOzTestModel.emitc.cpp @@ -0,0 +1,1532 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +// This code was originally sourced from github.com/iml130/mlir-emitc and has +// been modified to fit the needs of generated C++ models in LLVM. +#include "InlineOzTestModel.emitc.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace { +namespace emitc { +namespace utility { +template static constexpr size_t size() { + constexpr std::array s = {Shape...}; + size_t result = 1; + for (size_t i = 0; i < sizeof...(Shape); ++i) { + result *= s[i]; + } + return result; +} +template +static constexpr std::array strides() { + std::array result = {}; + constexpr std::array s = {Shape...}; + if (sizeof...(Shape) == 0) { + return result; + } + result[sizeof...(Shape) - 1] = 1; + for (size_t i = sizeof...(Shape) - 1; i > 0; i--) { + result[i - 1] = result[i] * s[i]; + } + return result; +} +template +constexpr size_t ravel_index(std::array indices) { + std::array shape = {Shape...}; + for (size_t i = 0; i < sizeof...(Shape); ++i) { + assert(indices[i] < shape[i]); + } + std::array s = strides(); + size_t result = 0; + for (size_t i = 0; i < indices.size(); ++i) { + result += indices[i] * s[i]; + } + return result; +} +template +constexpr size_t ravel_index(Indices... indices) { + static_assert(sizeof...(Indices) == sizeof...(Shape), + "Incorrect number of arguments"); + return ravel_index({static_cast(indices)...}); +} +template +constexpr std::array unravel_index(size_t index) { + assert(index < size()); + std::array s = strides(); + std::array result = {}; + for (size_t i = 0; i < sizeof...(Shape); ++i) { + result[i] = index / s[i]; + index = index % s[i]; + } + return result; +} +} // namespace utility +} // namespace emitc +namespace detail { +template constexpr size_t sum(const std::array arr) { + size_t result = 0; + for (size_t i = 0; i < arr.size(); ++i) { + result += arr[i]; + } + return result; +} +template constexpr size_t first(const std::array arr) { + static_assert(N > 0, "Cannot get the first element of an empty array"); + return arr[0]; +} +template constexpr bool all_same(const std::array arr) { + if (arr.size() == 0) { + return true; + } + size_t first = arr[0]; + for (size_t i = 1; i < arr.size(); ++i) { + if (arr[i] != first) { + return false; + } + } + return true; +} +template struct conjunction : std::true_type {}; +template struct conjunction : B1 {}; +template +struct conjunction + : std::conditional_t, B1> {}; +template constexpr bool conjunction_v = conjunction::value; +template struct case_t { + static constexpr bool value = B; + using type = T; +}; +template +struct switch_t : std::conditional_t> {}; +template struct switch_t { + using type = T; +}; +template struct switch_t> { + static_assert(B, "None of the supplied conditions evaluate to true."); + using type = T; +}; +} // namespace detail +template class Tensor { +public: + using value_type = T; + using reference = typename std::vector::reference; + using iterator = typename std::vector::iterator; + using const_iterator = typename std::vector::const_iterator; + Tensor() : data(size()) {} + Tensor(std::initializer_list data) : data(data) { + assert(data.size() == size()); + } + Tensor(std::vector data) : data(std::move(data)) {} + static constexpr size_t dim(size_t index) { + assert(0 <= index && index < rank()); + constexpr std::array s = {Shape...}; + return s[index]; + } + static constexpr size_t rank() { return sizeof...(Shape); } + static constexpr std::array shape() { return {Shape...}; } + static constexpr size_t size() { return emitc::utility::size(); } + static constexpr std::array strides() { + return emitc::utility::strides(); + } + T *get() { return data.data(); } + std::vector> + window(std::array index, std::array sizes) { + std::vector> iotas; + for (auto &size : sizes) { + std::vector range(size); + std::iota(range.begin(), range.end(), 0); + iotas.push_back(range); + } + std::vector> result; + int resultSize = + std::accumulate(sizes.begin(), sizes.end(), 1, std::multiplies{}); + for (int n = 0; n < resultSize; ++n) { + std::array u = {}; + div_t q{n, 0}; + for (int i = iotas.size() - 1; 0 <= i; --i) { + q = div(q.quot, iotas[i].size()); + u[i] = iotas[i][q.rem]; + } + for (size_t i = 0; i < index.size(); ++i) { + u[i] += index[i]; + } + result.push_back(u); + } + return result; + } + iterator begin() { return data.begin(); } + const_iterator begin() const { return data.begin(); } + iterator end() { return data.end(); } + const_iterator end() const { return data.end(); } + reference operator[](size_t index) { + assert(0 <= index && index < size()); + return data[index]; + } + template ...>>> + reference operator()(Indices... indices) { + static_assert(sizeof...(Indices) == rank(), + "Incorrect number of arguments"); + size_t index = ravel_index({static_cast(indices)...}); + assert(index < size()); + return data[index]; + } + constexpr size_t ravel_index(std::array indices) { + return emitc::utility::ravel_index(indices); + } + constexpr std::array unravel_index(size_t index) { + return emitc::utility::unravel_index(index); + } + +private: + std::vector data; +}; +template using Tensor0D = Tensor; +template using Tensor1D = Tensor; +template +using Tensor2D = Tensor; +template +using Tensor3D = Tensor; +template +using Tensor4D = Tensor; +template using is_scalar = std::is_arithmetic; +template +struct is_tensor : std::false_type {}; +template +struct is_tensor> : std::true_type {}; +template +struct is_tensor_of_dim : std::false_type {}; +template +struct is_tensor_of_dim> { + static constexpr bool value = Tensor::rank() == Dim; +}; +template +using IsScalar = typename std::enable_if_t::value, bool>; +template +using IsTensor = typename std::enable_if_t::value, bool>; +template +using IsTensorOfDim = + typename std::enable_if_t::value, bool>; +template struct get_element_type { + using type = T; +}; +template +struct get_element_type> { + using type = T; +}; +template +using IsTensorOfType = std::enable_if_t< + std::is_same::type, ET>::value, bool>; +template struct replace_element_type { + using type = Dest; +}; +template +struct replace_element_type> { + using type = Tensor; +}; +template using UnaryFuncType = Dest (*)(Src); +template +using BinaryFuncType = Dest (*)(SrcLeft, SrcRight); +template = true> +inline Dest unary(const Src &x, UnaryOp &&op) { + return op(x); +} +template = true> +inline Dest unary(const Src &x, UnaryOp &&op) { + Dest z; + std::transform(x.begin(), x.end(), z.begin(), op); + return z; +} +template = true, IsScalar = true> +inline Dest binary(const SrcLeft &x, const SrcRight &y, BinaryOp &&op) { + return op(x, y); +} +template = true, IsTensor = true> +inline Dest binary(const SrcLeft &x, const SrcRight &y, BinaryOp &&op) { + Dest z; + std::transform(x.begin(), x.end(), y.begin(), z.begin(), op); + return z; +} +template = true, IsScalar = true, + IsScalar = true> +inline Dest ternary(const SrcA &a, const SrcB &b, const SrcB &c, + TernaryOp &&op) { + return op(a, b, c); +} +template = true, IsTensor = true, + IsTensor = true> +inline Dest ternary(const SrcA &a, const SrcB &b, const SrcB &c, + TernaryOp &&op) { + Dest d; + auto first1 = a.begin(), last1 = a.end(); + auto first2 = b.begin(), first3 = c.begin(); + auto result = d.begin(); + while (first1 != last1) { + *result = op(*first1, *first2, *first3); + ++result; + ++first1; + ++first2; + ++first3; + } + return d; +} +template struct concat {}; +template +struct concat...> { + static_assert(0 <= Dim && Dim < 1, "Dimension index out of bounds"); + using type = Tensor1D({Xs...})>; +}; +template +struct concat...> { + static_assert(0 <= Dim && Dim < 2, "Dimension index out of bounds"); + static_assert((Dim == 0 && detail::all_same({Ys...})) || + (Dim == 1 && detail::all_same({Xs...})), + "All dimensions except for the dimension index must match"); + using type = typename std::conditional_t< + Dim == 0, + Tensor2D({Xs...}), + detail::first({Ys...})>, + Tensor2D({Xs...}), + detail::sum({Ys...})>>; +}; +template +struct concat...> { + static_assert(0 <= Dim && Dim < 3, "Dimension index out of bounds"); + using type = typename detail::switch_t< + detail::case_t({Xs...}), + detail::first({Ys...}), + detail::first({Zs...})>>, + detail::case_t({Xs...}), + detail::sum({Ys...}), + detail::first({Zs...})>>, + detail::case_t({Xs...}), + detail::first({Ys...}), + detail::sum({Zs...})>>>::type; +}; +template +struct concat...> { + static_assert(0 <= Dim && Dim < 4, "Dimension index out of bounds"); + using type = typename detail::switch_t< + detail::case_t({D0...}), + detail::first({D1...}), + detail::first({D2...}), + detail::first({D3...})>>, + detail::case_t({D0...}), + detail::sum({D1...}), + detail::first({D2...}), + detail::first({D3...})>>, + detail::case_t({D0...}), + detail::first({D1...}), + detail::sum({D2...}), + detail::first({D3...})>>, + detail::case_t({D0...}), + detail::first({D1...}), + detail::first({D2...}), + detail::sum({D3...})>>>::type; +}; +namespace emitc { +template inline Src abs(Src x) { + using ET_Src = typename get_element_type::type; + auto f = static_cast(std::abs); + return unary(x, f); +} +template inline Src ceil(Src x) { + using ET_Src = typename get_element_type::type; + auto f = static_cast(std::ceil); + return unary(x, f); +} +template inline Dest convert(Src x) { + using ET_Dest = typename get_element_type::type; + using ET_Src = typename get_element_type::type; + auto cast = [](ET_Src value) { return static_cast(value); }; + return unary>(x, cast); +} +template inline Src exp(Src x) { + using ET_Src = typename get_element_type::type; + auto f = static_cast(std::exp); + return unary(x, f); +} +template inline Src floor(Src x) { + using ET_Src = typename get_element_type::type; + auto f = static_cast(std::floor); + return unary(x, f); +} +template inline Src log(Src x) { + using ET_Src = typename get_element_type::type; + auto f = static_cast(std::log); + return unary(x, f); +} +template inline Src negate(Src x) { + using ET_Src = typename get_element_type::type; + auto f = std::negate{}; + return unary(x, f); +} +template +inline Src clamp(Min min, Src operand, Max max) { + static_assert( + std::is_same::value || + (is_tensor_of_dim<0, Min>::value && + std::is_same::type, + typename get_element_type::type>::value), + "Expected the same type for min and operand or a 0-dim tensor of the " + "same element type for min"); + static_assert( + std::is_same::value || + (is_tensor_of_dim<0, Max>::value && + std::is_same::type, + typename get_element_type::type>::value), + "Expected the same type for min and operand or a 0-dim tensor of the " + "same element type for max"); + const bool broadcast_min = !std::is_same::value; + const bool broadcast_max = !std::is_same::value; + Src result; + for (size_t index = 0; index < Src::size(); index++) { + const auto value_min = broadcast_min ? min[0] : min[index]; + const auto value_max = broadcast_max ? max[0] : max[index]; + auto value = operand[index]; + value = value < value_min ? value_min : value; + value = value > value_max ? value_max : value; + result[index] = value; + } + return result; +} +template inline Src sqrt(Src x) { + using ET_Src = typename get_element_type::type; + auto f = static_cast(std::sqrt); + return unary(x, f); +} +template inline Src tanh(Src x) { + using ET_Src = typename get_element_type::type; + auto f = static_cast(std::tanh); + return unary(x, f); +} +template inline Src add(Src x, Src y) { + using ET_Src = typename get_element_type::type; + auto f = std::plus{}; + return binary(x, y, f); +} +template inline Src max(Src x, Src y) { + using ET_Src = typename get_element_type::type; + auto f = + static_cast(std::max); + return binary(x, y, f); +} +template inline Src min(Src x, Src y) { + using ET_Src = typename get_element_type::type; + auto f = + static_cast(std::min); + return binary(x, y, f); +} +template inline Src mul(Src x, Src y) { + using ET_Src = typename get_element_type::type; + auto f = std::multiplies{}; + return binary(x, y, f); +} +template inline Src pow(Src x, Src y) { + using ET_Src = typename get_element_type::type; + auto f = [](ET_Src a, ET_Src b) -> ET_Src { + if (std::is_integral::value) { + const bool negative = b < 0; + if (b < 0) { + b = -b; + } + ET_Src result = 1; + for (ET_Src i = 0; i < b; i++) { + result *= a; + } + if (negative) { + result = 1 / result; + } + return result; + } else { + return std::pow(a, b); + } + }; + return binary(x, y, f); +} +template inline Src sub(Src x, Src y) { + using ET_Src = typename get_element_type::type; + auto f = std::minus{}; + return binary(x, y, f); +} +template +inline Dest +broadcast_in_dim(Src operand, + Tensor broadcast_dimensions) { + static_assert(is_tensor::value, "Expected tensor argument"); + static_assert(is_tensor::value, "Expected tensor result"); + std::vector retainedDimensions(Dest::rank()); + std::iota(retainedDimensions.begin(), retainedDimensions.end(), 0); + retainedDimensions.erase( + std::remove_if(retainedDimensions.begin(), retainedDimensions.end(), + [&broadcast_dimensions](size_t i) { + return std::find(broadcast_dimensions.begin(), + broadcast_dimensions.end(), + i) == broadcast_dimensions.end(); + }), + retainedDimensions.end()); + assert(retainedDimensions.size() == Src::rank()); + Dest result; + for (size_t i = 0; i < result.size(); i++) { + auto dest_index = result.unravel_index(i); + std::array src_index; + for (size_t j = 0; j < src_index.size(); j++) { + src_index[j] = dest_index[broadcast_dimensions(j)]; + } + for (size_t i = 0; i < src_index.size(); ++i) { + if (Src::shape()[i] == 1) { + src_index[i] = 0; + } + } + result[i] = operand[operand.ravel_index(src_index)]; + } + return result; +} +template +Dest dot(Lhs lhs, Rhs rhs) { + static_assert(is_tensor_of_dim<2, Lhs>::value, "Expected 2 dimensional lhs"); + static_assert(is_tensor_of_dim<2, Rhs>::value, "Expected 2 dimensional rhs"); + static_assert(Lhs::dim(1) == Rhs::dim(0), + "Expected contracting dimension to match"); + Dest output; + for (size_t m = 0; m < lhs.dim(0); m++) { + for (size_t n = 0; n < lhs.dim(1); n++) { + for (size_t k = 0; k < rhs.dim(1); k++) { + output(m, k) += lhs(m, n) * rhs(n, k); + } + } + } + return output; +} +template +Dest batch_matmul(Lhs lhs, Rhs rhs) { + static_assert(is_tensor_of_dim<3, Lhs>::value, "Expected 3 dimensional lhs"); + static_assert(is_tensor_of_dim<3, Rhs>::value, "Expected 3 dimensional rhs"); + static_assert(Lhs::dim(0) == Rhs::dim(0) && Lhs::dim(0) == Dest::dim(0), + "Expected batch dimension to match"); + static_assert(Lhs::dim(2) == Rhs::dim(1), + "Expected contracting dimension to match"); + static_assert(Dest::dim(1) == Lhs::dim(1), "Expected row dimension to match"); + static_assert(Dest::dim(2) == Rhs::dim(2), + "Expected column dimension to match"); + Dest output; + for (size_t b = 0; b < lhs.dim(0); b++) { + for (size_t m = 0; m < lhs.dim(1); m++) { + for (size_t n = 0; n < lhs.dim(2); n++) { + for (size_t k = 0; k < rhs.dim(2); k++) { + output(b, m, k) += lhs(b, m, n) * rhs(b, n, k); + } + } + } + } + return output; +} +template +inline Dest concatenate(Src input) { + Dest z = input; + return z; +} +template +inline Dest concatenate(Src1 input1, Src... inputs) { + static_assert(sizeof...(inputs) > 0, "Wrong template specialization chosen"); + using ET_Src = typename get_element_type::type; + using Rest = typename concat::type; + Rest rest = concatenate(inputs...); + Dest z; + auto calculate_shift = [](const auto &shape) { + size_t shift = 1; + for (size_t i = Dimension; i < shape.size(); i++) { + shift *= shape[i]; + } + return shift; + }; + auto a_shift = calculate_shift(Src1::shape()); + auto b_shift = calculate_shift(Rest::shape()); + for (auto a_ptr = input1.begin(), b_ptr = rest.begin(), c_ptr = z.begin(); + a_ptr != input1.end(); a_ptr += a_shift, b_ptr += b_shift) { + std::copy(a_ptr, a_ptr + a_shift, c_ptr); + c_ptr += a_shift; + std::copy(b_ptr, b_ptr + b_shift, c_ptr); + c_ptr += b_shift; + } + return z; +} +template inline Dest reshape(Src x) { + static_assert(is_tensor::value, "Expected tensor argument"); + static_assert(is_tensor::value, "Expected tensor result"); + using ET_Src = typename get_element_type::type; + using ET_Dest = typename get_element_type::type; + static_assert(std::is_same::value, "Element type mismatch"); + static_assert(Src::size() == Dest::size(), "Tensor size mismatch"); + Dest z; + std::copy(x.begin(), x.end(), z.begin()); + return z; +} +template = true> +Dest slice(Src x, Tensor start_indices, + Tensor limit_indices, Tensor strides) { + Dest z; + size_t index = 0; + for (int64_t i = start_indices[0]; i < limit_indices[0]; i += strides[0]) { + z[index++] = x(i); + } + return z; +} +template = true> +Dest slice(Src x, Tensor start_indices, + Tensor limit_indices, Tensor strides) { + Dest z; + size_t index = 0; + for (int64_t i = start_indices[0]; i < limit_indices[0]; i += strides[0]) { + for (int64_t j = start_indices[1]; j < limit_indices[1]; j += strides[1]) { + z[index++] = x(i, j); + } + } + return z; +} +template = true> +Dest slice(Src x, Tensor start_indices, + Tensor limit_indices, Tensor strides) { + Dest z; + size_t index = 0; + for (int64_t i = start_indices[0]; i < limit_indices[0]; i += strides[0]) { + for (int64_t j = start_indices[1]; j < limit_indices[1]; j += strides[1]) { + for (int64_t k = start_indices[2]; k < limit_indices[2]; + k += strides[2]) { + z[index++] = x(i, j, k); + } + } + } + return z; +} +template = true> +Dest slice(Src x, Tensor start_indices, + Tensor limit_indices, Tensor strides) { + Dest z; + size_t index = 0; + for (int64_t i = start_indices[0]; i < limit_indices[0]; i += strides[0]) { + for (int64_t j = start_indices[1]; j < limit_indices[1]; j += strides[1]) { + for (int64_t k = start_indices[2]; k < limit_indices[2]; + k += strides[2]) { + for (int64_t c = start_indices[3]; c < limit_indices[3]; + c += strides[3]) { + z[index++] = x(i, j, k, c); + } + } + } + } + return z; +} +template +inline Dest pad(Src operand, + Tensor::type> padding_value, + Tensor edge_padding_low, + Tensor edge_padding_high, + Tensor interior_padding) { + assert(std::all_of(interior_padding.begin(), interior_padding.end(), + [](int64_t i) { return i >= 0; })); + assert(std::all_of(edge_padding_low.begin(), edge_padding_low.end(), + [](int64_t i) { return i >= 0; })); + assert(std::all_of(edge_padding_high.begin(), edge_padding_high.end(), + [](int64_t i) { return i >= 0; })); + Dest result; + auto interior = [&interior_padding](std::array index) { + for (size_t i = 0; i < index.size(); i++) { + if (index[i] % (interior_padding[i] + 1) != 0) { + return true; + } + } + return false; + }; + auto out_of_bounds = [](std::array index) { + for (size_t i = 0; i < index.size(); i++) { + if (index[i] < 0 || index[i] >= Src::dim(i)) { + return true; + } + } + return false; + }; + for (size_t i = 0; i < result.size(); i++) { + auto index = result.unravel_index(i); + for (size_t j = 0; j < index.size(); j++) { + index[j] -= edge_padding_low[j]; + } + if (interior(index)) { + result[i] = padding_value(); + } else { + for (size_t j = 0; j < index.size(); j++) { + size_t pad = interior_padding[j]; + assert(index[j] % (pad + 1) == 0); + index[j] /= (pad + 1); + } + if (out_of_bounds(index)) { + result[i] = padding_value(); + } else { + result[i] = operand[operand.ravel_index(index)]; + } + } + } + return result; +} +} // namespace emitc +namespace emitc { +namespace tensor { +template +inline T extract(Tensor x, Indices... indices) { + return x(indices...); +} +template = true> +inline Dest splat(Src x) { + Dest z; + std::fill(z.begin(), z.end(), x); + return z; +} +} // namespace tensor +} // namespace emitc +namespace emitc { +namespace tosa { +template inline Src abs(Src x) { return emitc::abs(x); } +template inline Dest cast(Src x) { + return emitc::convert(x); +} +template inline Src ceil(Src x) { return emitc::ceil(x); } +template +inline Src clamp(Src operand, typename Src::value_type min_value, + typename Src::value_type max_value) { + Tensor min{min_value}; + Tensor max{max_value}; + return emitc::clamp(min, operand, max); +} +template inline Src clz(Src x) { + using ET_Src = typename get_element_type::type; + static_assert(std::is_same::value, + "Expected tensor of type int32_t"); + auto f = [](ET_Src element) { + ET_Src count = 32; + while (element != 0 && count > 0) { + count--; + element >>= 1; + } + return count; + }; + return unary(x, f); +} +template inline Src exp(Src x) { return emitc::exp(x); } +template inline Src floor(Src x) { return emitc::floor(x); } +template inline Src log(Src x) { return emitc::log(x); } +template inline Src negate(Src x) { return emitc::negate(x); } +template inline Src reciprocal(Src x) { + using ET_Src = typename get_element_type::type; + auto f = [](ET_Src element) { return (static_cast(1.0) / element); }; + return unary(x, f); +} +template +inline Dest rescale(Src x, typename get_element_type::type in_zp, + typename get_element_type::type out_zp, + Tensor1D mult, Tensor1D shift, + bool scale32, bool double_round, bool per_channel) { + using ET_Dest = typename get_element_type::type; + using Dest_I32 = typename replace_element_type::type; + assert(!(!scale32 && double_round) && + "Invalid combination of `scale32` and `double_round` arguments."); + auto apply_scale = [=](int64_t element, int64_t mult, int64_t shift) { + int64_t round = 1 << (shift - 1); + if (double_round && shift > 31) { + if (element >= 0) + round += 1 << 30; + else + round -= 1 << 30; + } + int64_t result = (element * mult + round) >> shift; + return static_cast(result); + }; + Dest_I32 result; + for (size_t i = 0; i < x.size(); ++i) { + size_t index = per_channel ? x.unravel_index(i)[x.rank() - 1] : 0; + int64_t element = x[i] - in_zp; + int32_t scaled_element = apply_scale(element, mult[index], shift[index]); + result[i] = scaled_element + out_zp; + } + Tensor0D min{ + static_cast(std::numeric_limits::min())}; + Tensor0D max{ + static_cast(std::numeric_limits::max())}; + return cast(emitc::clamp(min, result, max)); +} +template inline Src tanh(Src x) { return emitc::tanh(x); } +template inline Src add(Src x, Src y) { + return emitc::add(x, y); +} +template +inline Src arithmetic_right_shift(Src x, Src y, bool round) { + using ET_Src = typename get_element_type::type; + std::function f; + if (round) { + f = [](ET_Src left, ET_Src right) { + ET_Src result = left >> right; + if (right > 0 && ((left >> (right - 1)) & 1) != 0) { + result++; + } + return result; + }; + } else { + f = [](ET_Src left, ET_Src right) { return left >> right; }; + } + return binary(x, y, f); +} +template inline Dest equal(Src x, Src y) { + using ET_Src = typename get_element_type::type; + auto f = [](ET_Src left, ET_Src right) { return left == right; }; + return binary(x, y, f); +} +template inline Dest greater_equal(Src x, Src y) { + using ET_Src = typename get_element_type::type; + auto f = [](ET_Src left, ET_Src right) { return left >= right; }; + return binary(x, y, f); +} +template inline Src logical_left_shift(Src x, Src y) { + using ET_Src = typename get_element_type::type; + auto f = [](ET_Src left, ET_Src right) { return left << right; }; + return binary(x, y, f); +} +template inline Src mul(Src x, Src y) { + return emitc::mul(x, y); +} +template inline Src maximum(Src x, Src y) { + return emitc::max(x, y); +} +template inline Src minimum(Src x, Src y) { + return emitc::min(x, y); +} +template = true> +inline Src mul(Src x, Src y, const int32_t shift) { + if (shift > 0) { + auto f = [&shift](int32_t x, int32_t y) -> int32_t { + int64_t result; + int64_t round = 1L << (shift - 1); + result = x * y + round; + result = result >> shift; + return static_cast(result); + }; + return binary(x, y, f); + } else { + return emitc::mul(x, y); + } +} +template inline Src pow(Src x, Src y) { + return emitc::pow(x, y); +} +template inline Src sub(Src x, Src y) { + return emitc::sub(x, y); +} +template +inline Tensor table(Tensor x, + Tensor1D table) { + auto f = [&table](int8_t element) { + return table(static_cast(element) + 128); + }; + return unary>(x, f); +} +template +inline Tensor table(Tensor x, + Tensor1D table) { + auto f = [&table](int16_t element) { + int32_t integer = (element >> 7) + 0x100; + int32_t fractional = element & 0x7F; + int32_t result_integer = table(integer); + int32_t result_fractional = + (table(integer + 1) - table(integer)) * fractional; + return (result_integer << 7) + result_fractional; + }; + return unary>(x, f); +} +template +inline Dest select(SrcPred a, SrcOperand b, SrcOperand c) { + using ET_Src_Pred = typename get_element_type::type; + static_assert(std::is_same::value, + "Pred tensor type must be bool"); + using ET_Src_Operand = typename get_element_type::type; + auto f = [](ET_Src_Pred pred, ET_Src_Operand on_true, + ET_Src_Operand on_false) { return pred ? on_true : on_false; }; + return ternary(a, b, c, f); +} +template +inline Dest concat(Src... inputs) { + return emitc::concatenate(inputs...); +} +template +Dest conv2d(Src input, Weights weights, Tensor1D padding, + Tensor1D stride, Tensor1D dilation) { + static_assert(is_tensor_of_dim<4, Src>::value, + "Expected 4 dimensional input"); + static_assert(is_tensor_of_dim<4, Dest>::value, + "Expected 4 dimensional output"); + static_assert(is_tensor_of_dim<4, Weights>::value, + "Expected 4 dimensional weights"); + assert(stride[0] > 0); + assert(stride[1] > 0); + assert(dilation[0] == 1); + assert(dilation[1] == 1); + const int N = input.dim(0); + const int H_IN = input.dim(1); + const int W_IN = input.dim(2); + const int C_IN = input.dim(3); + Dest output; + const int C_OUT = output.dim(3); + const int K_H = weights.dim(1); + const int K_W = weights.dim(2); + const int S_H = stride[0]; + const int S_W = stride[1]; + const int pt = padding[0]; + const int pb = padding[1]; + const int pl = padding[2]; + const int pr = padding[3]; + const int H_PAD = pt + H_IN + pb; + const int W_PAD = pl + W_IN + pr; + for (int n = 0; n < N; n++) { + for (int h_pad = 0; h_pad < H_PAD - K_H + 1; h_pad += S_H) { + for (int w_pad = 0; w_pad < W_PAD - K_W + 1; w_pad += S_W) { + for (int kh = 0; kh < K_H; kh++) { + for (int kw = 0; kw < K_W; kw++) { + for (int c_in = 0; c_in < C_IN; c_in++) { + for (int c_out = 0; c_out < C_OUT; c_out++) { + const int h_out = h_pad / S_H; + const int w_out = w_pad / S_W; + const int h_in = h_pad - pt + kh; + const int w_in = w_pad - pl + kw; + if (h_in < 0 || h_in >= H_IN || w_in < 0 || w_in >= W_IN) + continue; + output(n, h_out, w_out, c_out) += + input(n, h_in, w_in, c_in) * weights(c_out, kh, kw, c_in); + } + } + } + } + } + } + } + return output; +} +template +Dest depthwise_conv2d(Src input, Weights weights, Tensor1D padding, + Tensor1D stride, + Tensor1D dilation) { + static_assert(is_tensor_of_dim<4, Src>::value, + "Expected 4 dimensional input"); + static_assert(is_tensor_of_dim<4, Dest>::value, + "Expected 4 dimensional output"); + static_assert(is_tensor_of_dim<4, Weights>::value, + "Expected 4 dimensional weights"); + static_assert(Src::dim(3) == Weights::dim(2), + "Input channels must equal weights channels"); + static_assert(Src::dim(0) == Dest::dim(0), "Batch sizes must be equal"); + static_assert(Dest::dim(3) % Src::dim(3) == 0, + "Output channels need to be a multiple of input channels"); + static_assert( + Dest::dim(3) == Src::dim(3) * Weights::dim(3), + "Output channels size must be input channels times channel multiplier"); + assert(stride[0] > 0); + assert(stride[1] > 0); + assert(dilation[0] == 1); + assert(dilation[1] == 1); + const int N = input.dim(0); + const int H_IN = input.dim(1); + const int W_IN = input.dim(2); + const int C_IN = input.dim(3); + Dest output; + const int K_H = weights.dim(0); + const int K_W = weights.dim(1); + const int M = weights.dim(3); + const int S_H = stride[0]; + const int S_W = stride[1]; + const int pt = padding[0]; + const int pb = padding[1]; + const int pl = padding[2]; + const int pr = padding[3]; + const int H_PAD = pt + H_IN + pb; + const int W_PAD = pl + W_IN + pr; + for (int n = 0; n < N; ++n) { + for (int h_pad = 0; h_pad < H_PAD - K_H + 1; h_pad += S_H) { + for (int w_pad = 0; w_pad < W_PAD - K_W + 1; w_pad += S_W) { + for (int kh = 0; kh < K_H; ++kh) { + for (int kw = 0; kw < K_W; ++kw) { + for (int c_in = 0; c_in < C_IN; ++c_in) { + for (int m = 0; m < M; ++m) { + const int h_out = h_pad / S_H; + const int w_out = w_pad / S_W; + const int c_out = c_in * M + m; + const int h_in = h_pad - pt + kh; + const int w_in = w_pad - pl + kw; + if (h_in < 0 || h_in >= H_IN || w_in < 0 || w_in >= W_IN) + continue; + const size_t weights_index = emitc::utility::ravel_index< + Weights::dim(0), Weights::dim(1), 1, + Weights::dim(2) * Weights::dim(3)>(kh, kw, 0, c_out); + output(n, h_out, w_out, c_out) += + input(n, h_in, w_in, c_in) * weights[weights_index]; + } + } + } + } + } + } + } + return output; +} +template +Dest fully_connected(Src input, Weights weights, Bias bias) { + static_assert(is_tensor_of_dim<2, Src>::value, + "Expected 2 dimensional input"); + static_assert(is_tensor_of_dim<2, Dest>::value, + "Expected 2 dimensional output"); + static_assert(is_tensor_of_dim<2, Weights>::value, + "Expected 2 dimensional weights"); + static_assert(is_tensor_of_dim<1, Bias>::value, + "Expected 1 dimensional bias"); + Dest output; + static_assert(input.dim(0) == output.dim(0), + "Output and input batch dimension do not match."); + static_assert(input.dim(1) == weights.dim(1), + "Input and weights dimensions do not match."); + static_assert(output.dim(1) == weights.dim(0), + "Output and weights dimensions do not match."); + static_assert(weights.dim(0) == bias.dim(0), + "Bias and weights dimensions do not match."); + const size_t N = input.dim(0); + const size_t C_IN = input.dim(1); + const size_t C_OUT = weights.dim(0); + for (size_t n = 0; n < N; ++n) { + for (size_t c_out = 0; c_out < C_OUT; ++c_out) { + for (size_t c_in = 0; c_in < C_IN; ++c_in) { + auto in = input(n, c_in); + auto weight = weights(c_out, c_in); + output(n, c_out) += in * weight; + } + output(n, c_out) += bias(c_out); + } + } + return output; +} +template = true, IsTensorOfDim<3, Src> = true, + IsTensorOfDim<2, Idx> = true, IsTensorOfType = true> +Dest gather(Src input, Idx indices) { + Dest result; + static_assert(input.dim(0) == result.dim(0), + "Input and output batch dimension do not match."); + static_assert(input.dim(0) == indices.dim(0), + "Input and weight batch dimension do not match."); + static_assert(input.dim(2) == result.dim(2), + "Input and output channel dimension do not match."); + static_assert(indices.dim(1) == result.dim(1), + "Weight and output index dimension do not match."); + auto it = result.begin(); + size_t d0offset = Src::dim(1) * Src::dim(2); + for (size_t i = 0, idx = Idx::size(); i < idx; i++) { + auto d0 = d0offset * (i / Idx::dim(1)); + auto d1 = Src::dim(2) * indices[i]; + auto start = input.begin() + d0 + d1; + auto end = start + Src::dim(2); + it = std::copy(start, end, it); + } + return result; +} +template +Tensor3D matmul(Tensor3D a, Tensor3D b) { + return emitc::batch_matmul>(a, b); +} +namespace { +template +inline Dest reduce(Src operand, typename get_element_type::type initValue, + int64_t dimension, Computation computation) { + static_assert(is_tensor::value, "Expected tensor argument"); + static_assert(is_tensor::value, "Expected tensor result"); + using ET_Src = typename get_element_type::type; + using ET_Dest = typename get_element_type::type; + static_assert(std::is_same::value, "Element type mismatch"); + static_assert(Src::rank() == Dest::rank() + 1, + "source rank must equal dest rank + 1"); + std::vector retainedDimensions(Src::rank()); + std::iota(retainedDimensions.begin(), retainedDimensions.end(), 0); + retainedDimensions.erase(retainedDimensions.begin() + dimension); + assert(retainedDimensions.size() == Dest::rank()); + Dest result; + std::fill(result.begin(), result.end(), initValue); + for (size_t i = 0; i < operand.size(); ++i) { + auto value = operand[i]; + auto index = operand.unravel_index(i); + std::array reducedIndex; + size_t j = 0; + for (size_t dim : retainedDimensions) { + reducedIndex[j++] = index[dim]; + } + auto reductionValue = result[result.ravel_index(reducedIndex)]; + result[result.ravel_index(reducedIndex)] = + computation(reductionValue, value); + } + return result; +} +} // namespace +template +inline Dest argmax(Src operand, int64_t dimension) { + static_assert(is_tensor::value, "Expected tensor argument"); + static_assert(is_tensor::value, "Expected tensor result"); + using ET_Src = typename get_element_type::type; + static_assert(Src::rank() == Dest::rank() + 1, + "source rank must equal dest rank + 1"); + std::vector retainedDimensions(Src::rank()); + std::iota(retainedDimensions.begin(), retainedDimensions.end(), 0); + retainedDimensions.erase(retainedDimensions.begin() + dimension); + assert(retainedDimensions.size() == Dest::rank()); + Dest result; + typename replace_element_type::type maxValues; + std::fill(maxValues.begin(), maxValues.end(), + std::numeric_limits::min()); + for (size_t i = 0; i < operand.size(); ++i) { + auto value = operand[i]; + auto index = operand.unravel_index(i); + std::array reducedIndex; + size_t j = 0; + for (size_t dim : retainedDimensions) { + reducedIndex[j++] = index[dim]; + } + auto destIndex = result.ravel_index(reducedIndex); + if (value > maxValues[destIndex]) { + maxValues[destIndex] = value; + result[destIndex] = index[dimension]; + } + } + return result; +} +template +inline Dest reduce_all(Src input, int64_t dimension) { + using ET_Src = typename get_element_type::type; + using ET_Dest = typename get_element_type::type; + static_assert(std::is_same::value, + "Src tensor type must be bool"); + static_assert(std::is_same::value, + "Dest tensor type must be bool"); + auto and_ = [](ET_Src a, ET_Src b) { return (a && b); }; + return tosa::reduce(input, true, dimension, and_); +} +template +inline Dest reduce_any(Src input, int64_t dimension) { + using ET_Src = typename get_element_type::type; + using ET_Dest = typename get_element_type::type; + static_assert(std::is_same::value, + "Src tensor type must be bool"); + static_assert(std::is_same::value, + "Dest tensor type must be bool"); + auto or_ = [](ET_Src a, ET_Src b) { return a || b; }; + return tosa::reduce(input, false, dimension, or_); +} +template +inline Dest reduce_max(Src input, int64_t dimension) { + using ET_Src = typename get_element_type::type; + auto f = + static_cast(std::max); + return tosa::reduce(input, std::numeric_limits::min(), + dimension, f); +} +template +inline Dest reduce_min(Src input, int64_t dimension) { + using ET_Src = typename get_element_type::type; + auto f = + static_cast(std::min); + return tosa::reduce(input, std::numeric_limits::max(), + dimension, f); +} +template +inline Dest reduce_prod(Src input, int64_t dimension) { + using ET_Src = typename get_element_type::type; + return tosa::reduce(input, 1, dimension, + std::multiplies{}); +} +template +inline Dest reduce_sum(Src input, int64_t dimension) { + using ET_Src = typename get_element_type::type; + return tosa::reduce(input, 0, dimension, std::plus{}); +} +template inline Dest reshape(Src x) { + return emitc::reshape(x); +} +template +Dest slice(Src x, Tensor start_indices, + Tensor slice_sizes) { + Tensor limit_indices = + emitc::add(start_indices, slice_sizes); + Tensor strides = + emitc::tensor::splat>(1); + return emitc::slice(x, start_indices, limit_indices, strides); +} +template +inline Dest pad(Src operand, Padding padding, + Tensor0D::type> pad_const = + Tensor0D::type>{0}) { + using ET_Padding = typename get_element_type::type; + static_assert(is_tensor::value, "Expected tensor result"); + static_assert(is_tensor::value, "Expected tensor argument"); + static_assert(is_tensor::value, "Expected tensor argument"); + static_assert(Padding::rank() == 2, "Padding must have rank 2"); + static_assert(Padding::dim(0) == Src::rank(), + "Dimension 1 of padding must equal source rank"); + static_assert(Padding::dim(1) == 2, "Dimension 2 of padding is must be 2"); + static_assert(std::is_same::value || + std::is_same::value, + "Padding element type must be i32 or i64"); + Tensor edge_padding_low; + Tensor edge_padding_high; + for (unsigned int i = 0; i < padding.dim(0); ++i) { + edge_padding_low(i) = padding(i, 0); + edge_padding_high(i) = padding(i, 1); + } + Tensor interior_padding; + std::fill(interior_padding.begin(), interior_padding.end(), 0); + return emitc::pad(operand, pad_const, edge_padding_low, + edge_padding_high, interior_padding); +} +template = true> +Dest tile(Src input, Tensor1D multiples) { + Dest result; + auto it = result.begin(); + for (int32_t i = 0, M0 = multiples[0]; i < M0; i++) { + it = std::copy(input.begin(), input.end(), it); + } + return result; +} +template = true> +Dest tile(Src input, Tensor1D multiples) { + Dest result; + auto it = result.begin(); + for (int32_t i = 0, M0 = multiples[0]; i < M0; i++) { + for (int32_t j = 0, D0 = Src::dim(0); j < D0; j++) { + for (int32_t k = 0, M1 = multiples[1]; k < M1; k++) { + auto start = input.begin() + j * Src::dim(1); + auto end = start + Src::dim(1); + it = std::copy(start, end, it); + } + } + } + return result; +} +template = true> +Dest tile(Src input, Tensor1D multiples) { + Dest result; + auto it = result.begin(); + for (int32_t m0 = 0, M0 = multiples[0]; m0 < M0; m0++) { + for (int32_t d0 = 0, D0 = Src::dim(0); d0 < D0; d0++) { + for (int32_t m1 = 0, M1 = multiples[1]; m1 < M1; m1++) { + for (int32_t d1 = 0, D1 = Src::dim(1); d1 < D1; d1++) { + for (int32_t m2 = 0, M2 = multiples[2]; m2 < M2; m2++) { + auto start = input.begin() + (d0 * Src::dim(1) + d1) * Src::dim(2); + auto end = start + Src::dim(2); + it = std::copy(start, end, it); + } + } + } + } + } + return result; +} +template = true> +Dest tile(Src input, Tensor1D multiples) { + Dest result; + auto it = result.begin(); + for (int32_t m0 = 0, M0 = multiples[0]; m0 < M0; m0++) { + for (int32_t d0 = 0, D0 = Src::dim(0); d0 < D0; d0++) { + for (int32_t m1 = 0, M1 = multiples[1]; m1 < M1; m1++) { + for (int32_t d1 = 0, D1 = Src::dim(1); d1 < D1; d1++) { + for (int32_t m2 = 0, M2 = multiples[2]; m2 < M2; m2++) { + for (int32_t d2 = 0, D2 = Src::dim(2); d2 < D2; d2++) { + for (int32_t m3 = 0, M3 = multiples[3]; m3 < M3; m3++) { + auto start = + input.begin() + + ((d0 * Src::dim(1) + d1) * Src::dim(2) + d2) * Src::dim(3); + auto end = start + Src::dim(3); + it = std::copy(start, end, it); + } + } + } + } + } + } + } + return result; +} +template +inline Dest transpose(Src operand, Tensor1D perms) { + static_assert(is_tensor::value, "Expected tensor argument"); + static_assert(is_tensor::value, "Expected tensor result"); + Tensor1D broadcast_dimensions; + for (size_t i = 0; i < perms.size(); ++i) { + auto pos = std::find(perms.begin(), perms.end(), i); + assert(pos != std::end(perms)); + int64_t index = std::distance(perms.begin(), pos); + broadcast_dimensions[i] = index; + } + return emitc::broadcast_in_dim(operand, broadcast_dimensions); +} +template +inline Dest transpose(Src input, Tensor1D perms) { + Tensor1D permsInt64; + for (size_t i = 0; i < perms.size(); ++i) { + permsInt64[i] = static_cast(perms[i]); + } + return tosa::transpose(input, permsInt64); +} +} // namespace tosa +} // namespace emitc +} // namespace +namespace emitc_generated { +class _InlineOzTestModelImpl { +private: + Tensor result; + Tensor v1; + Tensor v2; + Tensor v3; + Tensor v4; + Tensor v5; + Tensor v6; + Tensor v7; + Tensor v8; + Tensor v9; + Tensor v10; + Tensor v11; + Tensor v12; + Tensor v13; + Tensor v14; + Tensor v15; + Tensor v16; + Tensor v17; + Tensor v18; + Tensor v19; + Tensor v20; + Tensor v21; + Tensor v22; + Tensor v23; + Tensor v24; + Tensor v25; + Tensor v26; + Tensor v27; + Tensor v28; + Tensor v29; + Tensor v30; + Tensor v31; + Tensor v32; + Tensor v33; + Tensor v34; + Tensor v35; + Tensor v36; + Tensor v37; + Tensor v38; + Tensor v39; + +public: + int64_t *get_buffer_callsite_cost() { return v1.get(); } + int64_t *get_buffer_is_multiple_blocks() { return v2.get(); } + int64_t *get_buffer_caller_conditionally_executed_blocks() { + return v3.get(); + } + int64_t *get_buffer_inlining_default() { return v4.get(); } + int64_t *get_buffer_cold_cc_penalty() { return v5.get(); } + int64_t *get_buffer_callee_conditionally_executed_blocks() { + return v6.get(); + } + int64_t *get_buffer_callee_users() { return v7.get(); } + int64_t *get_buffer_callee_basic_block_count() { return v8.get(); } + int64_t *get_buffer_nr_ctant_params() { return v9.get(); } + int64_t *get_buffer_load_relative_intrinsic() { return v10.get(); } + int64_t *get_buffer_jump_table_penalty() { return v11.get(); } + int64_t *get_buffer_unsimplified_common_instructions() { return v12.get(); } + int64_t *get_buffer_indirect_call_penalty() { return v13.get(); } + int64_t *get_buffer_load_elimination() { return v14.get(); } + int64_t *get_buffer_call_penalty() { return v15.get(); } + int64_t *get_buffer_cost_estimate() { return v16.get(); } + int64_t *get_buffer_case_cluster_penalty() { return v17.get(); } + int64_t *get_buffer_node_count() { return v18.get(); } + int64_t *get_buffer_call_argument_setup() { return v19.get(); } + int64_t *get_buffer_sroa_savings() { return v20.get(); } + int64_t *get_buffer_lowered_call_arg_setup() { return v21.get(); } + int64_t *get_buffer_threshold() { return v22.get(); } + int64_t *get_buffer_dead_blocks() { return v23.get(); } + int64_t *get_buffer_constant_args() { return v24.get(); } + int64_t *get_buffer_sroa_losses() { return v25.get(); } + int64_t *get_buffer_simplified_instructions() { return v26.get(); } + int64_t *get_buffer_num_loops() { return v27.get(); } + int32_t *get_buffer_step_type() { return v28.get(); } + int64_t *get_buffer_edge_count() { return v29.get(); } + int64_t *get_buffer_nested_inlines() { return v30.get(); } + int64_t *get_buffer_caller_basic_block_count() { return v31.get(); } + int64_t *get_buffer_last_call_to_static_bonus() { return v32.get(); } + int64_t *get_buffer_nested_inline_cost_estimate() { return v33.get(); } + int64_t *get_buffer_callsite_height() { return v34.get(); } + int64_t *get_buffer_constant_offset_ptr_args() { return v35.get(); } + int64_t *get_buffer_switch_penalty() { return v36.get(); } + float *get_buffer_discount() { return v37.get(); } + int64_t *get_buffer_caller_users() { return v38.get(); } + float *get_buffer_reward() { return v39.get(); } + int64_t *run() { + result = runImpl(); + return result.get(); + } + Tensor runImpl() { + Tensor v40 = {1}; + return v40; + } +}; +InlineOzTestModel::InlineOzTestModel() + : impl{std::make_unique<_InlineOzTestModelImpl>()} {} +InlineOzTestModel::~InlineOzTestModel() {} +int64_t *InlineOzTestModel::get_buffer_callsite_cost() { + return impl->get_buffer_callsite_cost(); +} +int64_t *InlineOzTestModel::get_buffer_is_multiple_blocks() { + return impl->get_buffer_is_multiple_blocks(); +} +int64_t *InlineOzTestModel::get_buffer_caller_conditionally_executed_blocks() { + return impl->get_buffer_caller_conditionally_executed_blocks(); +} +int64_t *InlineOzTestModel::get_buffer_inlining_default() { + return impl->get_buffer_inlining_default(); +} +int64_t *InlineOzTestModel::get_buffer_cold_cc_penalty() { + return impl->get_buffer_cold_cc_penalty(); +} +int64_t *InlineOzTestModel::get_buffer_callee_conditionally_executed_blocks() { + return impl->get_buffer_callee_conditionally_executed_blocks(); +} +int64_t *InlineOzTestModel::get_buffer_callee_users() { + return impl->get_buffer_callee_users(); +} +int64_t *InlineOzTestModel::get_buffer_callee_basic_block_count() { + return impl->get_buffer_callee_basic_block_count(); +} +int64_t *InlineOzTestModel::get_buffer_nr_ctant_params() { + return impl->get_buffer_nr_ctant_params(); +} +int64_t *InlineOzTestModel::get_buffer_load_relative_intrinsic() { + return impl->get_buffer_load_relative_intrinsic(); +} +int64_t *InlineOzTestModel::get_buffer_jump_table_penalty() { + return impl->get_buffer_jump_table_penalty(); +} +int64_t *InlineOzTestModel::get_buffer_unsimplified_common_instructions() { + return impl->get_buffer_unsimplified_common_instructions(); +} +int64_t *InlineOzTestModel::get_buffer_indirect_call_penalty() { + return impl->get_buffer_indirect_call_penalty(); +} +int64_t *InlineOzTestModel::get_buffer_load_elimination() { + return impl->get_buffer_load_elimination(); +} +int64_t *InlineOzTestModel::get_buffer_call_penalty() { + return impl->get_buffer_call_penalty(); +} +int64_t *InlineOzTestModel::get_buffer_cost_estimate() { + return impl->get_buffer_cost_estimate(); +} +int64_t *InlineOzTestModel::get_buffer_case_cluster_penalty() { + return impl->get_buffer_case_cluster_penalty(); +} +int64_t *InlineOzTestModel::get_buffer_node_count() { + return impl->get_buffer_node_count(); +} +int64_t *InlineOzTestModel::get_buffer_call_argument_setup() { + return impl->get_buffer_call_argument_setup(); +} +int64_t *InlineOzTestModel::get_buffer_sroa_savings() { + return impl->get_buffer_sroa_savings(); +} +int64_t *InlineOzTestModel::get_buffer_lowered_call_arg_setup() { + return impl->get_buffer_lowered_call_arg_setup(); +} +int64_t *InlineOzTestModel::get_buffer_threshold() { + return impl->get_buffer_threshold(); +} +int64_t *InlineOzTestModel::get_buffer_dead_blocks() { + return impl->get_buffer_dead_blocks(); +} +int64_t *InlineOzTestModel::get_buffer_constant_args() { + return impl->get_buffer_constant_args(); +} +int64_t *InlineOzTestModel::get_buffer_sroa_losses() { + return impl->get_buffer_sroa_losses(); +} +int64_t *InlineOzTestModel::get_buffer_simplified_instructions() { + return impl->get_buffer_simplified_instructions(); +} +int64_t *InlineOzTestModel::get_buffer_num_loops() { + return impl->get_buffer_num_loops(); +} +int32_t *InlineOzTestModel::get_buffer_step_type() { + return impl->get_buffer_step_type(); +} +int64_t *InlineOzTestModel::get_buffer_edge_count() { + return impl->get_buffer_edge_count(); +} +int64_t *InlineOzTestModel::get_buffer_nested_inlines() { + return impl->get_buffer_nested_inlines(); +} +int64_t *InlineOzTestModel::get_buffer_caller_basic_block_count() { + return impl->get_buffer_caller_basic_block_count(); +} +int64_t *InlineOzTestModel::get_buffer_last_call_to_static_bonus() { + return impl->get_buffer_last_call_to_static_bonus(); +} +int64_t *InlineOzTestModel::get_buffer_nested_inline_cost_estimate() { + return impl->get_buffer_nested_inline_cost_estimate(); +} +int64_t *InlineOzTestModel::get_buffer_callsite_height() { + return impl->get_buffer_callsite_height(); +} +int64_t *InlineOzTestModel::get_buffer_constant_offset_ptr_args() { + return impl->get_buffer_constant_offset_ptr_args(); +} +int64_t *InlineOzTestModel::get_buffer_switch_penalty() { + return impl->get_buffer_switch_penalty(); +} +float *InlineOzTestModel::get_buffer_discount() { + return impl->get_buffer_discount(); +} +int64_t *InlineOzTestModel::get_buffer_caller_users() { + return impl->get_buffer_caller_users(); +} +float *InlineOzTestModel::get_buffer_reward() { + return impl->get_buffer_reward(); +} +int64_t *InlineOzTestModel::run() { return impl->run(); } + +} // namespace emitc_generated diff --git a/llvm/lib/Analysis/models/tflite_to_cpp.py b/llvm/lib/Analysis/models/tflite_to_cpp.py new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/models/tflite_to_cpp.py @@ -0,0 +1,117 @@ +"""Script for converting between TFLite and C++ using EmitC.""" +from absl import app +from absl import flags +from absl import logging + +import tflite_to_cpp_lib + +flags.DEFINE_string( + 'input', None, 'Input, which should be a path to a tflite model' +) +flags.mark_flag_as_required('input') + +flags.DEFINE_string( + 'output_dir', None, 'Output directory for the generated files' +) +flags.mark_flag_as_required('output_dir') + +flags.DEFINE_string( + 'name', + None, + ( + 'Name to use for the model. This will be in the filenames and also will' + ' be used to identify the model within LLVM. This should be unique' + ' between models' + ), +) +flags.mark_flag_as_required('name') + +flags.DEFINE_string( + 'iree_import_tflite_path', + None, + 'Path to the iree-import-tflite binary from iree repository', +) +flags.mark_flag_as_required('iree_import_tflite_path') + +flags.DEFINE_string( + 'emitc_opt_path', + None, + 'Path to the emitc-opt binary from the emitc repository', +) +flags.mark_flag_as_required('emitc_opt_path') + +flags.DEFINE_string( + 'mlir_translate_path', + None, + 'Path to the mlir-translate binary from the llvm repository', +) +flags.mark_flag_as_required('mlir_translate_path') + +flags.DEFINE_string( + 'emitc_runtime_path', + None, + 'Path to the emitc runtime to embed in the generated c++ model', +) +flags.mark_flag_as_required('emitc_runtime_path') + +flags.DEFINE_string( + 'clang_format_path', + None, + ( + '(Optional) path to clang-format binary to use to format the resulting' + ' files' + ), +) +flags.DEFINE_string( + 'clang_format_style', + 'llvm', + 'Style argument to use for clang format', +) + +FLAGS = flags.FLAGS + + +def main(argv): + del argv + logging.info('Beginning conversion pipeline.') + tosa = tflite_to_cpp_lib.tflite_to_tosa( + tflite_path=FLAGS.input, + iree_import_tflite_path=FLAGS.iree_import_tflite_path, + ) + emitc_mlir = tflite_to_cpp_lib.tosa_to_emitc_mlir( + tosa=tosa, emitc_opt_path=FLAGS.emitc_opt_path + ) + model = tflite_to_cpp_lib.emitc_mlir_to_cpp( + emitc_mlir=emitc_mlir, + mlir_translate_path=FLAGS.mlir_translate_path, + name=FLAGS.name, + ) + model = tflite_to_cpp_lib.embed_runtime( + model=model, + runtime_path=FLAGS.emitc_runtime_path, + ) + + tflite_to_cpp_lib.print_llvm_registration_handle(model=model) + + model = tflite_to_cpp_lib.add_license_and_notice(model=model) + + if FLAGS.clang_format_path: + model = tflite_to_cpp_lib.format_model( + model=model, + clang_format_path=FLAGS.clang_format_path, + clang_format_style=FLAGS.clang_format_style, + ) + + cpp_path = tflite_to_cpp_lib.get_model_cpp_path(model, FLAGS.output_dir) + hdr_path = tflite_to_cpp_lib.get_model_hdr_path(model, FLAGS.output_dir) + + logging.info('Writing generated files to [%s] and [%s].', cpp_path, hdr_path) + with open(cpp_path, 'wt', encoding='utf-8') as f: + f.write(model.cpp) + with open(hdr_path, 'wt', encoding='utf-8') as f: + f.write(model.hdr) + logging.info('Done.') + + +if __name__ == '__main__': + app.run(main) diff --git a/llvm/lib/Analysis/models/tflite_to_cpp_lib.py b/llvm/lib/Analysis/models/tflite_to_cpp_lib.py new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/models/tflite_to_cpp_lib.py @@ -0,0 +1,347 @@ +"""Library for converting between TFLite and C++ using EmitC.""" +from __future__ import annotations + +import os +import dataclasses +import subprocess +import pathlib +import re + +from absl import flags +from absl import logging + +flags.DEFINE_bool( + 'rename_main_to_action', + True, + ( + 'Whether to remain the @main method to @action, if it exists. This' + ' option exists because the MLGO-generated policies use the name' + ' @action, but the LLVM test policies that are generated by scripts in' + ' lib/Analysis/models use the name @main.' + ), +) + +FLAGS = flags.FLAGS + +_TFAGENTS_POLICY_NAME = 'action' +_MODEL_NAMESPACE = 'emitc_generated' + +# pylint: disable=line-too-long +_LICENSE_AND_NOTICE = """// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +// This code was originally sourced from github.com/iml130/mlir-emitc and has +// been modified to fit the needs of generated C++ models in LLVM. +""" + + +def _fmt_includes(includes, angles=False): + lhs = '<' if angles else '"' + rhs = '>' if angles else '"' + return '\n'.join([f'#include {lhs}{hdr}{rhs}' for hdr in includes]) + '\n' + + +@dataclasses.dataclass +class EmitCRuntime: + """Holds the runtime buffers in memory.""" + + # Maps [header name] -> [header contents] + headers: dict[str, str] + + # Which is the primary head for the runtime? e.g., 'tosa.h' + primary: str + + +def _load_emitc_runtime(path: str) -> EmitCRuntime: + """Load the EmitC runtime from a given path.""" + headers = {} + pathlist = pathlib.Path(path).glob('*.h') + for p in pathlist: + with open(p, 'rt', encoding='utf-8') as f: + headers[p.name] = f.read() + return EmitCRuntime(headers=headers, primary='tosa.h') + + +def _create_local_emitc_runtime(runtime: EmitCRuntime) -> str: + """Create a "local" version of the EmitC runtime. + + The "local" version is analogous to a single-header version of the runtime, + but intended to be put in a .cpp file. All relevant code is wrapped in an + anonymous namespace in the .cpp file, so each model will have its own copy of + the runtime. + + This function modifies the runtime in the following way: + 1) removes all macros aside from includes + 2) removes all comments/whitespace + + This function depends on a particular implementation of the runtime which is + prefered by mlcompileropt. To generalize this code, the function should + topologically sort each header in the runtime by the inclusion ordering + + Args: + runtime: the runtime to create a local version of. + + Returns: + the contents of the local runtime as a string. + """ + topsort_on_includes = [ + 'utility.h', + 'types.h', + 'core_ops.h', + 'tensor.h', + 'tosa.h', + ] + assert set(topsort_on_includes).issubset(set(runtime.headers.keys())) + # we don't currently support the eigen runtime, so set the file to zero + runtime.headers['tosa_eigen.h'] = '' + has_been_included = {key: False for key in topsort_on_includes} + for key in topsort_on_includes: + + def on_match(m): + group = m.group(1) + if group not in topsort_on_includes or has_been_included[group]: + return '' + has_been_included[group] = True + return runtime.headers[group] + + runtime.headers[key] = re.sub( + r'#include "emitc/(\w+\.h)"', + on_match, + runtime.headers[key], + ) + local_runtime = runtime.headers[runtime.primary] + # Remove all comments, they just take up space + local_runtime = re.sub(r'//.*', '', local_runtime) + + # Find any stdlib includes and store them + stdlib_includes = re.findall(r'#include <(\w+)>', local_runtime) + + # Remove all the remaining macros + local_runtime = re.sub(r'#.*', '', local_runtime) + + # Wrap the runtime in a local namespace to prevent ODR problems + local_runtime = 'namespace {\n' + local_runtime + '\n}' + + # Reinsert the stdlib includes + include_str = ( + '\n'.join([f'#include <{hdr}>' for hdr in stdlib_includes]) + '\n' + ) + + local_runtime = include_str + local_runtime + + # Remove all empty newlines and return + return '\n'.join( + [l for l in local_runtime.splitlines() if (l and not l.isspace())] + ) + + +@dataclasses.dataclass +class EmitCModel: + # TODO: document this + # TODO: get rid of cpp and hdr + name: str + cpp: str + hdr: str + + +def _run_clang_format( + buffer: str, clang_format_path: str, clang_format_style: str +) -> str: + """Formats the given buffer and returns the result""" + cmdline = [clang_format_path, f'--style={clang_format_style}'] + result = subprocess.run( + cmdline, stdout=subprocess.PIPE, text=True, input=buffer, check=True + ) + return result.stdout + + +def format_model( + model: EmitCModel, clang_format_path: str, clang_format_style: str +) -> EmitCModel: + """Formats the given model and returns the result""" + logging.info( + 'Formatting the resulting model with style [%s].', clang_format_style + ) + return dataclasses.replace( + model, + cpp=_run_clang_format( + model.cpp, + clang_format_path=clang_format_path, + clang_format_style=clang_format_style, + ), + hdr=_run_clang_format( + model.hdr, + clang_format_path=clang_format_path, + clang_format_style=clang_format_style, + ), + ) + + +def get_model_cpp_path(model: EmitCModel, root: str) -> str: + return os.path.join(root, model.name + '.emitc.cpp') + + +def get_model_hdr_path(model: EmitCModel, root: str) -> str: + return os.path.join(root, model.name + '.emitc.h') + + +def tflite_to_tosa( + tflite_path: str, iree_import_tflite_path: str, *, convert_i48=True +) -> str: + """Converts TFLite to TOSA MLIR.""" + logging.info('Converting the TFLite model to TOSA MLIR.') + cmdline = [ + iree_import_tflite_path, + '-o', + '-', + tflite_path, + '--output-format=mlir-ir', + ] + result = subprocess.run( + cmdline, stdout=subprocess.PIPE, text=True, check=True + ) + if convert_i48: + return re.sub(r'i48', 'i64', result.stdout) + return result.stdout + + +def tosa_to_emitc_mlir(tosa: str, emitc_opt_path: str) -> str: + """Converts TOSA MLIR to EmitC MLIR using emitc-opt.""" + if FLAGS.rename_main_to_action: + tosa = re.sub('@main', '@action', tosa) + logging.info('Converting the TOSA MLIR to EmitC MLIR.') + cmdline = [emitc_opt_path, '--convert-tosa-to-emitc', '-o', '-', '-'] + result = subprocess.run( + cmdline, stdout=subprocess.PIPE, text=True, input=tosa, check=True + ) + return result.stdout + + +def emitc_mlir_to_cpp( + emitc_mlir: str, + mlir_translate_path: str, + name: str, +) -> EmitCModel: + """Converts EmitC MLIR to C++ files using mlir-translate.""" + logging.info('Converting the EmitC MLIR to C++.') + + def _get_cmdline(kind: str): + return [ + mlir_translate_path, + '-mlir-to-cpp', + '--emit-cpp-kind=stateful', + '--emit-cpp-arg-name-attr=tf_saved_model.index_path', + f'--emit-cpp-model-name={name}', + f'--emit-cpp-file-kind={kind}', + f'--emit-cpp-only-one-fn={_TFAGENTS_POLICY_NAME}', + '-o', + '-', + '-', + ] + + result_cpp = subprocess.run( + _get_cmdline('cpp'), + stdout=subprocess.PIPE, + text=True, + input=emitc_mlir, + check=True, + ).stdout + result_hdr = subprocess.run( + _get_cmdline('header'), + stdout=subprocess.PIPE, + text=True, + input=emitc_mlir, + check=True, + ).stdout + + # Wrap results in namespaces + result_cpp = f'namespace {_MODEL_NAMESPACE} {{' + '\n' + result_cpp + '}\n' + result_hdr = f'namespace {_MODEL_NAMESPACE} {{' + '\n' + result_hdr + '}\n' + + return EmitCModel(cpp=result_cpp, hdr=result_hdr, name=name) + + +def embed_runtime( + model: EmitCModel, + runtime_path: str, +) -> EmitCModel: + """Embed the emitc runtime in the model.cpp file. + + This also: + 1) renames any types that are coming from LLVM instead of the embedded + runtime, and + 2) includes all required headers + + Args: + model: the model which we are embedding the runtime into. + runtime_path: path to the emitc runtime to embed. + + Returns: + the new model + """ + logging.info('Embedding the EmitC runtime in the generated model.') + + runtime = _load_emitc_runtime(runtime_path) + local_runtime = _create_local_emitc_runtime(runtime) + + new_cpp = local_runtime + model.cpp + + # Add necessary includes to both files + cpp_includes = [f'{model.name}.emitc.h'] + hdr_includes = ['memory', 'string'] + + new_cpp = _fmt_includes(cpp_includes) + new_cpp + new_hdr = _fmt_includes(hdr_includes, angles=True) + model.hdr + + return dataclasses.replace(model, cpp=new_cpp, hdr=new_hdr) + + +def add_license_and_notice(model: EmitCModel) -> EmitCModel: + new_cpp = _LICENSE_AND_NOTICE + model.cpp + new_hdr = _LICENSE_AND_NOTICE + model.hdr + return dataclasses.replace(model, cpp=new_cpp, hdr=new_hdr) + + +def print_llvm_registration_handle(model: EmitCModel): + """Prints LLVM model registration code. + + This handle automatically adds the model to a global registry of models that + are available in LLVM, so all that needs to be done to integrate the model in + LLVM is link the .cpp with the required binary. + """ + registration_msg = f""" +{'*'*60} +To register the generated model in LLVM, please: + 1) copy the generated .cpp/.h to llvm/lib/Analysis/models/emitc, + 2) add the .cpp to llvm/lib/Analysis/models/emitc/CMakeLists.txt, and + 3) include the following code somewhere else in a LLVM .cpp file: + +#include "models/emitc/{model.name}.emitc.h" +REGISTER_EMITC_MODEL(FULLY_QUALIFIED_NAME_OF_EMITC_MODEL_RUNNER, {model.name}); + +Note the .cpp file that you put the above code in must include the line at +least once: + +#include "llvm/Analysis/EmitCModelRegistry.h". + +The token FULLY_QUALIFIED_NAME_OF_EMITC_MODEL_RUNNER needs to be replaced with +the (template) class which implements the EmitC MLModelRunner for your specific +problem. For example, if your model was named MyModel and it was for inlining, +the macro would look like: + +REGISTER_EMITC_MODEL(::llvm::MLInlinerEmitCRunner, MyModel); + +{'*'*60} +""" + logging.info(registration_msg) diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -141,7 +141,9 @@ clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", - "Use release mode (AOT-compiled model)"))); + "Use release mode (AOT-compiled model)"), + clEnumValN(InliningAdvisorMode::EmitC, "emitc", + "Use EmitC-compiled model"))); static cl::opt EnableSyntheticCounts( "enable-npm-synthetic-counts", cl::Hidden, diff --git a/llvm/test/Transforms/Inline/ML/ml-test-emitc-mode.ll b/llvm/test/Transforms/Inline/ML/ml-test-emitc-mode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/ML/ml-test-emitc-mode.ll @@ -0,0 +1,6 @@ +; This test uses Inputs/test-module.ll, as it shares it with a similar test +; for the 'development' and 'release' mode. The InlineOzTestModel inlines +; everything. +; +; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=emitc -inliner-emitc-model-name=InlineOzTestModel -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=CHECK +; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=default -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=DEFAULT